bitsparrow/
lib.rs

1//! For implementations in other languages, and more detailed
2//! information on the types check out http://bitsparrow.io/.
3//!
4//! # BitSparrow in Rust
5//!
6//! ## Encoding
7//!
8//! ```
9//! use bitsparrow::Encoder;
10//!
11//! let buffer = Encoder::new()
12//!              .uint8(100)
13//!              .string("Foo")
14//!              .end();
15//!
16//! assert_eq!(buffer, &[0x64,0x03,0x46,0x6f,0x6f])
17//! ```
18//!
19//! Each method on the `Encoder` will return a mutable borrow of
20//! the encoder. If you need to break the monad chain, store the
21//! owned encoder as a variable before writing to it, e.g.:
22//!
23//! ```
24//! use bitsparrow::Encoder;
25//!
26//! let mut encoder = Encoder::new();
27//! encoder.uint8(100);
28//!
29//! /*
30//!  * Many codes here
31//!  */
32//!
33//! let buffer = encoder.string("Foo").end();
34//!
35//! assert_eq!(buffer, &[0x64_u8,0x03,0x46,0x6f,0x6f]);
36//! ```
37//!
38//! ## Decoding
39//!
40//! ```
41//! use bitsparrow::Decoder;
42//!
43//! let buffer = &[0x64,0x03,0x46,0x6f,0x6f];
44//! let mut decoder = Decoder::new(buffer);
45//!
46//! assert_eq!(100u8, decoder.uint8().unwrap());
47//! assert_eq!("Foo", decoder.string().unwrap());
48//! assert_eq!(true, decoder.end());
49//! ```
50//!
51//! Decoder allows you to retrieve the values in order they were
52//! encoded. Calling the `end` method is optional - it will return
53//! `true` if you have read the entire buffer, ensuring the entire
54//! buffer has been read.
55
56use std::{ mem, fmt, error, str, ptr };
57
58/// Simple error type returned either by the `Decoder` or `Encoder`
59#[derive(Debug)]
60pub enum Error {
61    Utf8Encoding,
62    ReadingOutOfBounds,
63}
64
65impl error::Error for Error {
66    fn description(&self) -> &str {
67        match *self {
68            Error::Utf8Encoding       => "Couldn't decode UTF-8 string",
69            Error::ReadingOutOfBounds => "Attempted to read out of bounds",
70        }
71    }
72}
73
74impl fmt::Display for Error {
75    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
76        write!(f, "{}", error::Error::description(self))
77    }
78}
79
80static SIZE_MASKS: [u8; 9] = [
81    0b00000000,
82    0b10000000,
83    0b11000000,
84    0b11100000,
85    0b11110000,
86    0b11111000,
87    0b11111100,
88    0b11111110,
89    0b11111111
90];
91
92/// Encoder takes in typed data and produces a binary buffer
93/// represented as `Vec<u8>`.
94pub struct Encoder {
95    data: Vec<u8>,
96    bool_index: usize,
97    bool_shift: u8,
98}
99
100macro_rules! write_bytes {
101    ($data:expr, $value:ident) => ({
102        unsafe {
103            let size = mem::size_of_val(&$value);
104            let ptr: *const u8 = mem::transmute(&$value.to_be());
105
106            let len = $data.len();
107            $data.reserve(size);
108            $data.set_len(len + size);
109
110            ptr::copy_nonoverlapping(
111                ptr,
112                $data.as_mut_ptr().offset(len as isize),
113                size
114            );
115        }
116    })
117}
118
119impl Encoder {
120    /// Create a new instance of the `Encoder`.
121    #[inline]
122    pub fn new() -> Encoder {
123        Encoder {
124            data: Vec::new(),
125            bool_index: std::usize::MAX,
126            bool_shift: 0,
127        }
128    }
129
130    /// Create a new instance of the `Encoder` with a preallocated buffer capacity.
131    #[inline]
132    pub fn with_capacity(capacity: usize) -> Encoder {
133        Encoder {
134            data: Vec::with_capacity(capacity),
135            bool_index: std::usize::MAX,
136            bool_shift: 0,
137        }
138    }
139
140    /// Store a `u8` on the buffer.
141    #[inline]
142    pub fn uint8(&mut self, uint8: u8) -> &mut Encoder {
143        self.data.push(uint8);
144
145        self
146    }
147
148    /// Store a 'u16' on the buffer.
149    #[inline]
150    pub fn uint16(&mut self, uint16: u16) -> &mut Encoder {
151        write_bytes!(self.data, uint16);
152
153        self
154    }
155
156    /// Store a 'u32' on the buffer.
157    #[inline]
158    pub fn uint32(&mut self, uint32: u32) -> &mut Encoder {
159        write_bytes!(self.data, uint32);
160
161        self
162    }
163
164    /// Store a 'u64' on the buffer.
165    #[inline]
166    pub fn uint64(&mut self, uint64: u64) -> &mut Encoder {
167        write_bytes!(self.data, uint64);
168
169        self
170    }
171
172    /// Store an `i8` on the buffer.
173    #[inline]
174    pub fn int8(&mut self, int8: i8) -> &mut Encoder {
175        self.data.push(int8 as u8);
176
177        self
178    }
179
180    /// Store an `i16` on the buffer.
181    #[inline]
182    pub fn int16(&mut self, int16: i16) -> &mut Encoder {
183        write_bytes!(self.data, int16);
184
185        self
186    }
187
188    #[inline]
189    /// Store an `i32` on the buffer.
190    pub fn int32(&mut self, int32: i32) -> &mut Encoder {
191        write_bytes!(self.data, int32);
192
193        self
194    }
195
196    #[inline]
197    /// Store an `i32` on the buffer.
198    pub fn int64(&mut self, int64: i64) -> &mut Encoder {
199        write_bytes!(self.data, int64);
200
201        self
202    }
203
204    /// Store a `float32` on the buffer.
205    #[inline]
206    pub fn float32(&mut self, float32: f32) -> &mut Encoder {
207        self.uint32(unsafe { mem::transmute(float32) })
208    }
209
210    /// Store a `float64` on the buffer.
211    #[inline]
212    pub fn float64(&mut self, float64: f64) -> &mut Encoder {
213        self.uint64(unsafe { mem::transmute(float64) })
214    }
215
216    /// Store a `bool` on the buffer. Calling `bool` multiple times
217    /// in a row will attempt to store the information on a single
218    /// byte.
219    ///
220    /// ```
221    /// use bitsparrow::Encoder;
222    ///
223    /// let buffer = Encoder::new()
224    ///              .bool(true)
225    ///              .bool(false)
226    ///              .bool(false)
227    ///              .bool(false)
228    ///              .bool(false)
229    ///              .bool(true)
230    ///              .bool(true)
231    ///              .bool(true)
232    ///              .end();
233    ///
234    /// // booleans are stacked as bits on a single byte, right to left.
235    /// assert_eq!(buffer, &[0b11100001]);
236    /// ```
237    #[inline]
238    pub fn bool(&mut self, bool: bool) -> &mut Encoder {
239        let bool_bit: u8 = if bool { 1 } else { 0 };
240        let index = self.data.len();
241
242        if self.bool_index == index && self.bool_shift < 7 {
243            self.bool_shift += 1;
244            self.data[index - 1] = self.data[index - 1] | bool_bit << self.bool_shift;
245            return self;
246        }
247
248        self.bool_index = index + 1;
249        self.bool_shift = 0;
250
251        self.uint8(bool_bit)
252    }
253
254    /// Store a `usize` on the buffer. This will use a variable amount of bytes
255    /// depending on the value of `usize`, making it a very powerful and flexible
256    /// type to send around. BitSparrow uses `size` internally to prefix `string`
257    /// and `bytes` as those can have an arbitrary length, and using a large
258    /// number type such as u32 could be an overkill if all you want to send is
259    /// `"Foo"`. Detailed explanation on how BitSparrow stores `size` can be found
260    /// on [the homepage](http://bitsparrow.io).
261    #[inline]
262    pub fn size(&mut self, size: usize) -> &mut Encoder {
263        if size < 128 {
264            return self.uint8(size as u8);
265        }
266
267        let mut size = size as u64;
268
269        let lead = size.leading_zeros() as usize;
270        let bytes = if lead == 0 { 9 } else { 9 - (lead - 1) / 7 };
271
272        let mut buf: [u8; 9] = unsafe { mem::uninitialized() };
273
274        for i in (1 .. bytes).rev() {
275            buf[i] = size as u8;
276            size >>= 8;
277        }
278        buf[0] = (size as u8) | SIZE_MASKS[bytes - 1];
279
280        self.data.extend_from_slice(&buf[0 .. bytes]);
281
282        self
283    }
284
285    /// Store an arbitary collection of bytes represented as `&[u8]`,
286    /// easy to use by dereferencing `Vec<u8>` with `&`.
287    #[inline]
288    pub fn bytes(&mut self, bytes: &[u8]) -> &mut Encoder {
289        self.size(bytes.len());
290        self.data.extend_from_slice(bytes);
291
292        self
293    }
294
295    /// Store an arbitrary UTF-8 Rust string on the buffer.
296    #[inline]
297    pub fn string(&mut self, string: &str) -> &mut Encoder {
298        self.size(string.len());
299        self.data.extend_from_slice(string.as_bytes());
300
301        self
302    }
303
304    /// Finish encoding, obtain the buffer and reset the encoder.
305    #[inline]
306    pub fn end(&mut self) -> Vec<u8> {
307        self.bool_index = std::usize::MAX;
308        self.bool_shift = 0;
309
310        mem::replace(&mut self.data, Vec::new())
311    }
312}
313
314
315/// Decoder reads from a binary slice buffer (`&[u8]`) and exposes
316/// methods to read BitSparrow types from it in the same order they
317/// were encoded by the `Encoder`.
318pub struct Decoder<'a> {
319    index: usize,
320    data: &'a [u8],
321    bool_index: usize,
322    bool_shift: u8,
323}
324
325macro_rules! read_bytes {
326    ($decoder:expr, $t:ident) => ({
327        let size = mem::size_of::<$t>();
328        let end = $decoder.index + size;
329        if end > $decoder.data.len() {
330            return Err(Error::ReadingOutOfBounds);
331        }
332
333        unsafe {
334            let mut value: $t = mem::uninitialized();
335            let ptr: *mut u8 = mem::transmute(&mut value);
336
337            ptr::copy_nonoverlapping(
338                $decoder.data.as_ptr().offset($decoder.index as isize),
339                ptr,
340                size
341            );
342
343            $decoder.index = end;
344
345            Ok($t::from_be(value))
346        }
347    })
348}
349
350impl<'a> Decoder<'a> {
351    /// Create a new `Decoder` reading from a `&[u8]` slice buffer.
352    #[inline]
353    pub fn new(data: &[u8]) -> Decoder {
354        Decoder {
355            index: 0,
356            data: data,
357            bool_index: std::usize::MAX,
358            bool_shift: 0,
359        }
360    }
361
362    /// Read a `u8` from the buffer and progress the internal index.
363    #[inline]
364    pub fn uint8(&mut self) -> Result<u8, Error> {
365        if self.index >= self.data.len() {
366            return Err(Error::ReadingOutOfBounds);
367        }
368        let uint8 = self.data[self.index];
369        self.index += 1;
370        return Ok(uint8);
371    }
372
373    /// Read a `u16` from the buffer and progress the internal index.
374    #[inline]
375    pub fn uint16(&mut self) -> Result<u16, Error> {
376        read_bytes!(self, u16)
377    }
378
379    /// Read a `u32` from the buffer and progress the internal index.
380    #[inline]
381    pub fn uint32(&mut self) -> Result<u32, Error> {
382        read_bytes!(self, u32)
383    }
384
385    /// Read a `u64` from the buffer and progress the internal index.
386    #[inline]
387    pub fn uint64(&mut self) -> Result<u64, Error> {
388        read_bytes!(self, u64)
389    }
390
391    /// Read an `i8` from the buffer and progress the internal index.
392    #[inline]
393    pub fn int8(&mut self) -> Result<i8, Error> {
394        let uint8 = try!(self.uint8());
395
396        Ok(uint8 as i8)
397    }
398
399    /// Read an `i16` from the buffer and progress the internal index.
400    #[inline]
401    pub fn int16(&mut self) -> Result<i16, Error> {
402        read_bytes!(self, i16)
403    }
404
405    /// Read an `i32` from the buffer and progress the internal index.
406    #[inline]
407    pub fn int32(&mut self) -> Result<i32, Error> {
408        read_bytes!(self, i32)
409    }
410
411    /// Read an `i64` from the buffer and progress the internal index.
412    #[inline]
413    pub fn int64(&mut self) -> Result<i64, Error> {
414        read_bytes!(self, i64)
415    }
416
417    /// Read a `float32` from the buffer and progress the internal index.
418    #[inline]
419    pub fn float32(&mut self) -> Result<f32, Error> {
420        let uint32 = try!(self.uint32());
421
422        Ok(unsafe { mem::transmute(uint32) })
423    }
424
425    /// Read a `float64` from the buffer and progress the internal index.
426    #[inline]
427    pub fn float64(&mut self) -> Result<f64, Error> {
428        let uint64 = try!(self.uint64());
429
430        Ok(unsafe { mem::transmute(uint64) })
431    }
432
433    /// Read a `bool` from the buffer and progress the internal index. If
434    /// a `bool` was previously read from the buffer, calling `bool()`
435    /// on the `Decoder` again will read a boolean from the same index
436    /// without progressing, but instead shifting to read the next bit.
437    /// This behavior is symmetric to how the `Encoder` stores the `bool`s,
438    /// and is completely transparent when using the API.
439    ///
440    /// ```
441    /// use bitsparrow::Decoder;
442    ///
443    /// // Reading `bools` from a single byte.
444    /// let buffer = &[0b11100001];
445    /// let mut decoder = Decoder::new(buffer);
446    ///
447    /// assert_eq!(true, decoder.bool().unwrap());
448    /// assert_eq!(false, decoder.bool().unwrap());
449    /// assert_eq!(false, decoder.bool().unwrap());
450    /// assert_eq!(false, decoder.bool().unwrap());
451    /// assert_eq!(false, decoder.bool().unwrap());
452    /// assert_eq!(true, decoder.bool().unwrap());
453    /// assert_eq!(true, decoder.bool().unwrap());
454    /// assert_eq!(true, decoder.bool().unwrap());
455    ///
456    /// // Ensure we've read the entire buffer
457    /// assert_eq!(true, decoder.end());
458    /// ```
459    pub fn bool(&mut self) -> Result<bool, Error> {
460        if self.bool_index == self.index && self.bool_shift < 7 {
461            self.bool_shift += 1;
462            let bits = self.data[self.index - 1];
463            let bool_bit = 1 << self.bool_shift;
464            return Ok(bits & bool_bit == bool_bit);
465        }
466
467        let bits = try!(self.uint8());
468        self.bool_index = self.index;
469        self.bool_shift = 0;
470
471        Ok(bits & 1 == 1)
472    }
473
474    /// Read a `usize` from the buffer and progress the index. Detailed
475    /// explanation on how BitSparrow stores `size` can be found on
476    /// [the homepage](http://bitsparrow.io).
477    pub fn size(&mut self) -> Result<usize, Error> {
478        let high = try!(self.uint8());
479
480        // 1 byte (no signature)
481        if (high & 128) == 0 {
482            return Ok(high as usize);
483        }
484
485        let mut ext_bytes = (!high).leading_zeros() as usize;
486        let mut size = (high ^ SIZE_MASKS[ext_bytes]) as usize;
487
488        while ext_bytes != 0 {
489            ext_bytes -= 1;
490            size = (size << 8) | try!(self.uint8()) as usize;
491        }
492
493        Ok(size)
494    }
495
496    /// Read an arbitary sized binary data from the buffer and
497    /// progress the index.
498    ///
499    /// **Note:** BitSparrow internally prefixes `bytes` with
500    /// `size` so you don't have to worry about how many bytes
501    /// you need to read.
502    #[inline]
503    pub fn bytes(&mut self) -> Result<&[u8], Error> {
504        // Order of addition is important here!
505        // Calling `size` will modify the `index`.
506        let end = try!(self.size()) + self.index;
507
508        if end > self.data.len() {
509            return Err(Error::ReadingOutOfBounds);
510        }
511
512        let bytes = &self.data[self.index .. end];
513
514        self.index = end;
515
516        Ok(bytes)
517    }
518
519    /// Read an arbitary sized owned `String` from the buffer and
520    /// progress the index.
521    ///
522    /// **Note:** Analog to `bytes`, BitSparrow internally prefixes
523    /// `string` with `size` so you don't have to worry about how
524    /// many bytes you need to read.
525    #[inline]
526    pub fn string(&mut self) -> Result<&str, Error> {
527        str::from_utf8(try!(self.bytes())).map_err(|_| Error::Utf8Encoding)
528    }
529
530    /// Returns `true` if the entire buffer has been read, otherwise
531    /// returns `false`.
532    #[inline]
533    pub fn end(&self) -> bool {
534        self.index >= self.data.len()
535    }
536}