manual_serializer/
lib.rs

1//!
2//! This crate provides functionality for manual serialization and deserialization of
3//! data from `u8` buffers.  If you are looking for proper serialization libraries please
4//! consider using [`serde`](https://crates.io/crates/serde). This crate is useful for
5//! direct value extraction from memory buffers where you need to quickly extract values
6//! from structured data that may not be aligned.
7//! 
8//! NOTE: Current implementation support extraction only for primitives encoded with
9//! little-endian encoding.
10//! 
11//! # Example
12//! 
13//! ```rust
14//! 
15//! #[derive(Debug)]
16//! pub struct Header {
17//!     pub magic : usize,
18//!     pub version : u16,
19//!     pub payload : Vec<u8>,
20//! }
21//! 
22//! impl TrySerialize for Header {
23//!     type Error = Error;
24//!     fn try_serialize(&self, dest: &mut Serializer) -> Result<()> {
25//!         dest.try_align_u32()?;
26//!         dest.try_store_u16le(self.magic as u16)?;
27//!         dest.try_store_u16le(self.version)?;
28//!         dest.try_store_u32le(self.payload.len() as u32)?;
29//!         dest.try_store_u8_slice(&self.payload)?;
30//!         Ok(())
31//!     }
32//! }
33//! 
34//! fn store() {
35//!     let mut dest = Serializer::new(4096);
36//!     let header = Header::default();
37//!     dest.try_store(&header)?;
38//! }
39//! 
40//! impl TryDeserialize for Header {
41//!     type Error = Error;
42//! 
43//!     fn try_deserialize(src: &mut Deserializer) -> Result<Header> {
44//!         src.try_align_u32()?;
45//! 
46//!         let magic = src.try_load_u16le()? as usize;
47//!         let version = src.try_load_u16le()?;
48//!         let payload_length = src.try_load_u32le()? as usize;
49//!         let payload = src.try_load_u8_vec(payload_length)?.to_vec()?;
50//! 
51//!         Ok(Header{magic, version, payload})
52//!     }
53//! }
54//! 
55//! fn load(data: &[u8], offset: usize) -> Result<(u32,Header)>{
56//!     let mut src = Deserializer::new(data);
57//!     src.offset(offset)?;
58//!     let signature = src.try_load_u32le()?;
59//!     let header: Header = src.try_load()?;
60//!     Ok((signature,header))
61//! }
62//! ```
63//! 
64pub mod error;
65pub use error::Error;
66pub mod result;
67pub use result::Result;
68
69/// Deserializer referring an existing `u8` buffer
70pub struct Deserializer<'data> {
71    data: &'data [u8],
72    cursor: usize,
73}
74
75impl<'data> Deserializer<'data> {
76    /// Create a new `Deserializer` referring the supplied `u8` buffer
77    pub fn new(data: &'data [u8]) -> Deserializer<'data> {
78        Deserializer {
79            data,
80            cursor: 0,
81        }
82    }
83
84    /// Get current byte position of the reader
85    pub fn cursor(&self) -> usize {
86        self.cursor
87    }
88
89    /// Get amount of bytes remaining for consumption
90    pub fn remaining(&self) -> usize {
91        self.data.len() - self.cursor
92    }
93
94    /// Advance the reader by `offset` bytes
95    pub fn try_offset(&mut self, offset: usize) -> Result<()> {
96        self.cursor += offset;
97        if self.cursor > self.data.len() {
98            return Err(format!("deserializer offset {offset} is out of bounds[0..{}]", self.data.len()).into());
99        }
100        Ok(())
101    }
102
103    /// Advance the cursor to ensure that current cursor position is 32-bit aligned
104    pub fn try_align_u32(&mut self) -> Result<()> {
105        self.try_align(4)?;
106        Ok(())
107    }
108    
109    /// Advance the cursor to ensure that current cursor position is 64-bit aligned
110    pub fn try_align_u64(&mut self) -> Result<()> {
111        self.try_align(8)?;
112        Ok(())
113    }
114
115    /// Advance the cursor to ensure its alignment is on the `align` byte boundary.
116    /// The following ensures that the cursor is on a 128-bit alignment:
117    /// ```
118    /// deser.try_align(16)?;
119    /// ```
120    pub fn try_align(&mut self, align: usize) -> Result<()> {
121        let offset = self.cursor % align;
122        self.try_offset(offset)?;
123        Ok(())
124    }
125
126    /// Set the cursor byte position to the given `cursor` value
127    pub fn try_set_cursor(&mut self, cursor: usize) -> Result<()> {
128        self.cursor = cursor;
129        if self.cursor > self.data.len() {
130            return Err(format!("deserializer cursor {cursor} is out of bounds[0..{}]", self.data.len()).into());
131        }
132        Ok(())
133    }
134
135    /// Try reading `Vec<u8>` buffer of the supplied `len` byte length.
136    pub fn try_load_u8_vec(&mut self, len: usize) -> Result<Vec<u8>> {
137        if self.cursor+len > self.data.len() {
138            return Err(format!("try_u8vec(): deserializer cursor {} is out of bounds[0..{}]",self.cursor+len, self.data.len()).into());
139        }
140        let mut vec: Vec<u8> = Vec::with_capacity(len);
141        vec.resize(len,0);
142        vec.copy_from_slice(&self.data[self.cursor..self.cursor+len]);
143        self.cursor += len;
144        Ok(vec)
145    }
146
147    /// Try reading `Vec<u16>` array of the supplied `len` elements.
148    /// The `u16` values are read as little-endian values.
149    pub fn try_load_u16le_vec(&mut self, len: usize) -> Result<Vec<u16>> {
150        let mut vec: Vec<u16> = Vec::with_capacity(len);
151        for _ in 0..len {
152            vec.push(self.try_load_u16le()?)
153        }
154        Ok(vec)
155    }
156
157    /// Try reading an array of `u16` little-endian values from a 
158    /// zero-terminated `u16` array and return it as a Rust `String`.  
159    /// This function is useful for reading windows `PCWSTR` zero-terminated 
160    /// strings into Rust strings.
161    pub fn try_load_utf16le_sz(&mut self) -> Result<String> {
162        let mut vec: Vec<u16> = Vec::new();
163        loop {
164            let v = self.try_load_u16le()?;
165            if v == 0 {
166                break;
167            }
168            vec.push(v);
169        }
170        Ok(String::from_utf16(&vec)?)
171    }
172
173    /// Load a u8 value
174    pub fn load_u8(&mut self) -> u8 {
175        let last = self.cursor+1;
176        let v = u8::from_le_bytes(self.data[self.cursor..last].try_into().unwrap());
177        self.cursor = last;
178        v
179    }
180
181    /// Try load a u8 value
182    pub fn try_load_u8(&mut self) -> Result<u8> {
183        let last = self.cursor+1;
184        let v = u8::from_le_bytes(self.data[self.cursor..last].try_into()?);
185        self.cursor = last;
186        Ok(v)
187    }
188
189    /// Load a u16 little-endian value
190    pub fn load_u16le(&mut self) -> u16 {
191        let last = self.cursor + 2;
192        let v = u16::from_le_bytes(self.data[self.cursor..last].try_into().unwrap());
193        self.cursor = last;
194        v
195    }
196
197    /// Try load a u16 little-endian value
198    pub fn try_load_u16le(&mut self) -> Result<u16> {
199        let last = self.cursor+2;
200        let v = u16::from_le_bytes(self.data[self.cursor..last].try_into()?);
201        self.cursor = last;
202        Ok(v)
203    }
204
205    /// Load a u32 little-endian value
206    pub fn load_u32le(&mut self) -> u32 {
207        let last = self.cursor+4;
208        let v = u32::from_le_bytes(self.data[self.cursor..last].try_into().unwrap());
209        self.cursor = last;
210        v
211    }
212
213    /// Try load a u32 little-endian value
214    pub fn try_load_u32le(&mut self) -> Result<u32> {
215        let last = self.cursor+4;
216        let v = u32::from_le_bytes(self.data[self.cursor..last].try_into()?);
217        self.cursor = last;
218        Ok(v)
219    }
220
221    /// Load a u64 little-endian value
222    pub fn load_u64le(&mut self) -> u64 {
223        let last = self.cursor+8;
224        let v = u64::from_le_bytes(self.data[self.cursor..last].try_into().unwrap());
225        self.cursor = last;
226        v
227    }
228
229    /// Try load a u64 little-endian value
230    pub fn try_load_u64le(&mut self) -> Result<u64> {
231        let last = self.cursor+8;
232        let v = u64::from_le_bytes(self.data[self.cursor..last].try_into()?);
233        self.cursor = last;
234        Ok(v)
235    }
236
237    /// Load a primitive implementing a [`Deserialize`] trait
238    pub fn load<S : Deserialize>(&mut self) -> S {
239        S::deserialize(self)
240    }
241
242    /// Try load a primitive implementing a [`Deserialize`] trait
243    pub fn try_load<S : TryDeserialize>(&mut self) -> std::result::Result<S,S::Error> {
244        S::try_deserialize(self)
245    }
246
247}
248
249/// TryDeserialize trait accepted by [`Deserializer::try_load`]
250pub trait TryDeserialize where Self : Sized {
251    type Error;
252    fn try_deserialize(dest:&mut Deserializer) -> std::result::Result<Self,Self::Error>;
253}
254
255/// Deserialize trait accepted by [`Deserializer::load`]
256pub trait Deserialize {
257    fn deserialize(dest:&mut Deserializer) -> Self;
258}
259
260/// Serializer struct containing a serialization buffer.
261/// Please note that this struct expects to have a sufficiently large buffer
262/// to perform the ongoing serialization.
263pub struct Serializer {
264    data: Vec<u8>,
265    cursor: usize,
266}
267
268/// Default implementation for [`Serializer`] that allocates 4096 byte buffer.
269impl Default for Serializer {
270    fn default() -> Serializer {
271        Serializer::new(4096)
272    }
273}
274
275impl Serializer {
276    /// Create a new [`Serializer`] struct with `len` byte buffer
277    pub fn new(len: usize) -> Serializer {
278        let mut data = Vec::with_capacity(len);
279        data.resize(len, 0);
280        Serializer {
281            data,
282            cursor: 0,
283        }
284    }
285
286    /// Returns the current byte length of the ongoing serialization (cursor position)
287    pub fn len(&self) -> usize {
288        self.cursor
289    }
290
291    /// Returns `Vec<u8>` of the currently serialized data
292    pub fn to_vec(&self) -> Vec<u8> {
293        self.data[0..self.cursor].to_vec()
294    }
295
296    /// Returns a slice `&[u8]` of the currently serialized data
297    pub fn as_slice<'slice>(&'slice self) -> &'slice [u8] {
298        &self.data[0..self.cursor]
299    }
300
301    /// Advance the cursor by `offset` bytes. Since the underlying
302    /// buffer is zero-initialized, skipped bytes will remain as zero.
303    pub fn offset(&mut self, offset: usize) -> &mut Self {
304        if self.cursor + offset >= self.len() {
305        }
306        self.cursor += offset; 
307        self
308    }
309    
310    /// Try advance the cursor by `offset` bytes. Since the underlying
311    /// buffer is zero-initialized, skipped bytes will remain as zero.
312    pub fn try_offset(&mut self, offset: usize) -> Result<&mut Self> {
313        if self.cursor + offset >= self.data.len() {
314            return Err(Error::TryOffsetError(offset,self.cursor,self.len()));
315        }
316        self.cursor += offset; 
317        Ok(self)
318    }
319
320    /// Advance the cursor by `offset` bytes while explicitly setting
321    /// skipped bytes to zero. This can be useful if manually positioning
322    /// cursor within the buffer and the repositioning can result in 
323    /// the buffer containing previously serialized data.
324    pub fn offset_with_zeros(&mut self, offset: usize) -> &mut Self {
325        for _ in 0..offset {
326            self.store_u8(0);
327        }
328        self
329    }
330
331    /// Try advance the cursor by `offset` bytes while explicitly setting
332    /// skipped bytes to zero. This can be useful if manually positioning
333    /// cursor within the buffer and the repositioning can result in 
334    /// the buffer containing previously serialized data.
335    pub fn try_offset_with_zeros(&mut self, offset: usize) -> Result<&mut Self> {
336        if self.cursor + offset >= self.data.len() {
337            return Err(Error::TryOffsetError(offset,self.cursor,self.len()));
338        }
339        for _ in 0..offset {
340            self.store_u8(0);
341        }
342        Ok(self)
343    }
344
345    /// Advance the cursor to ensure that the current cursor position
346    /// is on the 32-bit alignment boundary.
347    pub fn align_u32(&mut self) -> &mut Self {
348        let offset = self.cursor % 4;
349        self.offset(offset)
350    }
351
352    /// Try advance the cursor to ensure that the current cursor position
353    /// is on the 32-bit alignment boundary.
354    pub fn try_align_u32(&mut self) -> Result<&mut Self> {
355        let offset = self.cursor % 4;
356        self.try_offset(offset)
357    }
358
359    /// Advance the cursor to ensure that the current cursor position
360    /// is on the 64-bit alignment boundary.
361    pub fn align_u64(&mut self) -> &mut Self {
362        let offset = self.cursor % 8;
363        self.offset(offset)
364    }
365
366    /// Try advance the cursor to ensure that the current cursor position
367    /// is on the 64-bit alignment boundary.
368    pub fn try_align_u64(&mut self) -> Result<&mut Self> {
369        let offset = self.cursor % 8;
370        self.try_offset(offset)
371    }
372
373    /// Store a single `u8` value, advancing the cursor by 1 byte.
374    pub fn store_u8(&mut self, v: u8) -> &mut Self {
375        let last = self.cursor+1;
376        self.data[self.cursor..last].copy_from_slice(&v.to_le_bytes());
377        self.cursor = last;
378        self
379    }
380
381    /// Try store a single `u8` value, advancing the cursor by 1 byte.
382    pub fn try_store_u8(&mut self, v: u8) -> Result<&mut Self> {
383        if self.cursor + 1 >= self.data.len() {
384            return Err(Error::TryStoreError("u8",self.cursor,self.data.len()));
385        }
386        let last = self.cursor+1;
387        self.data[self.cursor..last].copy_from_slice(&v.to_le_bytes());
388        self.cursor = last;
389        Ok(self)
390    }
391
392    /// Store a `u16` value using little-endian encoding, advancing the cursor by 2 bytes.
393    pub fn store_u16le(&mut self, v: u16) -> &mut Self {
394        let last = self.cursor+2;
395        self.data[self.cursor..last].copy_from_slice(&v.to_le_bytes());
396        self.cursor = last;
397        self
398    }
399    
400    /// Try to store a `u16` value using little-endian encoding, advancing the cursor by 2 bytes.
401    pub fn try_store_u16le(&mut self, v: u16) -> Result<&mut Self> {
402        if self.cursor + 2 >= self.data.len() {
403            return Err(Error::TryStoreError("u16",self.cursor,self.data.len()));
404        }
405        let last = self.cursor+2;
406        self.data[self.cursor..last].copy_from_slice(&v.to_le_bytes());
407        self.cursor = last;
408        Ok(self)
409    }
410    
411    /// Store a `u32` value using little-endian encoding, advancing the cursor by 4 bytes.
412    pub fn store_u32le(&mut self, v: u32) -> &mut Self {
413        let last = self.cursor+4;
414        self.data[self.cursor..last].copy_from_slice(&v.to_le_bytes());
415        self.cursor = last;
416        self
417    }
418    
419    /// Try to store a `u32` value using little-endian encoding, advancing the cursor by 4 bytes.
420    pub fn try_store_u32le(&mut self, v: u32) -> Result<&mut Self> {
421        if self.cursor + 4 >= self.data.len() {
422            return Err(Error::TryStoreError("u32",self.cursor,self.data.len()));
423        }
424        let last = self.cursor+4;
425        self.data[self.cursor..last].copy_from_slice(&v.to_le_bytes());
426        self.cursor = last;
427        Ok(self)
428    }
429    
430    /// Store a `u64` value using little-endian encoding, advancing the cursor by 8 bytes.
431    pub fn store_u64le(&mut self, v: u64) -> &mut Self {
432        let last = self.cursor+8;
433        self.data[self.cursor..last].copy_from_slice(&v.to_le_bytes());
434        self.cursor = last;
435        self
436    }
437    
438    /// Try to store a `u64` value using little-endian encoding, advancing the cursor by 8 bytes.
439    pub fn try_store_u64le(&mut self, v: u64) -> Result<&mut Self> {
440        if self.cursor + 8 >= self.data.len() {
441            return Err(Error::TryStoreError("u64",self.cursor,self.data.len()));
442        }
443        let last = self.cursor+8;
444        self.data[self.cursor..last].copy_from_slice(&v.to_le_bytes());
445        self.cursor = last;
446        Ok(self)
447    }
448    
449    /// Try to store a Rust `String` as a zero-terminated sequence of `u16` 
450    /// little-endian encoded bytes. This is useful to serialize windows `PCWSTR` 
451    /// zero-terminated strings.
452    pub fn try_store_utf16le_sz(&mut self, text : &String) -> Result<&mut Self> {
453        let len = text.len()+1;
454        let mut vec: Vec<u16> = Vec::with_capacity(len);
455        for c in text.chars() {
456            // TODO - proper encoding
457            // let buf = [0;2];
458            // c.encode_utf16(&mut buf);
459            vec.push(c as u16);
460        }
461        vec.push(0);
462        // println!("text: {} vec: {:?}",text,vec);
463        self.try_store_u16le_slice(&vec)?;
464        Ok(self)
465    }
466
467    /// Try to store a `u8` slice
468    pub fn try_store_u8_slice(&mut self, vec : &[u8]) -> Result<&mut Self> {
469        let len = vec.len();
470        let last = self.cursor + len;
471        if last >= self.data.len() {
472            return Err(Error::TryStoreSliceError(len,self.cursor,self.data.len()));
473        }
474        let src = unsafe { std::mem::transmute(vec.as_ptr()) };
475        let dest = self.data[self.cursor..last].as_mut_ptr();
476        unsafe { std::ptr::copy(src,dest,len); }
477        self.cursor = last;
478        Ok(self)
479    }
480
481    /// Try to store a `u16` slice as a sequence of little-endian encoded `u16` values.
482    pub fn try_store_u16le_slice(&mut self, vec : &[u16]) -> Result<&mut Self> {
483        let src = unsafe { std::mem::transmute(vec.as_ptr()) };
484        let bytelen = vec.len()*2;
485        let last = self.cursor + bytelen;
486        if last >= self.data.len() {
487            return Err(Error::TryStoreSliceError(bytelen,self.cursor,self.data.len()));
488        }
489        let dest = self.data[self.cursor..last].as_mut_ptr();
490        unsafe { std::ptr::copy(src,dest,bytelen); }
491        self.cursor = last;
492        Ok(self)
493    }
494
495    /// Store a primitive implementing a [`Serialize`] trait
496    pub fn store<S : Serialize>(&mut self, s : &S) -> &mut Self {
497        s.serialize(self);
498        self
499    }
500    
501    /// Try store a primitive implementing a [`TrySerialize`] trait
502    pub fn try_store<S : TrySerialize>(&mut self, s : &S) -> std::result::Result<&mut Self,S::Error> {
503        s.try_serialize(self)?;
504        Ok(self)
505    }
506}
507
508/// TrySerialize trait accepted by the [`Serializer::try_store`]
509pub trait TrySerialize {
510    type Error;
511    fn try_serialize(&self, dest:&mut Serializer) -> std::result::Result<(),Self::Error>;
512}
513
514/// Serialize trait accepted by the [`Serializer::store`]
515pub trait Serialize {
516    fn serialize(&self, dest:&mut Serializer);
517}
518
519// helper functions
520
521/// store u64 little-endian value in the supplied buffer, returning the number of bytes written
522#[inline]
523pub fn store_u64le(dest : &mut [u8], v : u64) -> usize {
524    dest[0..8].copy_from_slice(&v.to_le_bytes());
525    8
526}
527
528/// store u32 little-endian value in the supplied buffer, returning the number of bytes written
529#[inline]
530pub fn store_u32le(dest : &mut [u8], v : u32) -> usize {
531    dest[0..4].copy_from_slice(&v.to_le_bytes());
532    4
533}
534
535/// store u16 little-endian value in the supplied buffer, returning the number of bytes written
536#[inline]
537pub fn store_u16le(dest : &mut [u8], v : u16) -> usize {
538    dest[0..2].copy_from_slice(&v.to_le_bytes());
539    2
540}
541
542/// store u8 value in the supplied buffer, returning the number of bytes written
543#[inline]
544pub fn store_u8(dest : &mut [u8], v : u8) -> usize {
545    dest[0..1].copy_from_slice(&v.to_le_bytes());
546    1
547}
548
549
550/// load u64 little-endian value from the supplied buffer
551#[inline]
552pub fn load_u64le(src : &[u8]) -> u64 {
553    u64::from_le_bytes(src[0..8].try_into().unwrap())
554}
555
556/// load u32 little-endian value from the supplied buffer
557#[inline]
558pub fn load_u32le(src : &[u8]) -> u32 {
559    u32::from_le_bytes(src[0..4].try_into().unwrap())
560}
561
562/// load u16 little-endian value from the supplied buffer
563#[inline]
564pub fn load_u16le(src : &[u8]) -> u16 {
565    u16::from_le_bytes(src[0..2].try_into().unwrap())
566}
567
568/// load u8 value from the supplied buffer
569#[inline]
570pub fn load_u8(src : &[u8]) -> u8 {
571    u8::from_le_bytes(src[0..1].try_into().unwrap())
572}
573