bitsparrow/lib.rs
1//! For implementations in other languages, and more detailed
2//! information on the types check out http://bitsparrow.io/.
3//!
4//! # BitSparrow in Rust
5//!
6//! ## Encoding
7//!
8//! ```
9//! use bitsparrow::Encoder;
10//!
11//! let buffer = Encoder::new()
12//! .uint8(100)
13//! .string("Foo")
14//! .end();
15//!
16//! assert_eq!(buffer, &[0x64,0x03,0x46,0x6f,0x6f])
17//! ```
18//!
19//! Each method on the `Encoder` will return a mutable borrow of
20//! the encoder. If you need to break the monad chain, store the
21//! owned encoder as a variable before writing to it, e.g.:
22//!
23//! ```
24//! use bitsparrow::Encoder;
25//!
26//! let mut encoder = Encoder::new();
27//! encoder.uint8(100);
28//!
29//! /*
30//! * Many codes here
31//! */
32//!
33//! let buffer = encoder.string("Foo").end();
34//!
35//! assert_eq!(buffer, &[0x64_u8,0x03,0x46,0x6f,0x6f]);
36//! ```
37//!
38//! ## Decoding
39//!
40//! ```
41//! use bitsparrow::Decoder;
42//!
43//! let buffer = &[0x64,0x03,0x46,0x6f,0x6f];
44//! let mut decoder = Decoder::new(buffer);
45//!
46//! assert_eq!(100u8, decoder.uint8().unwrap());
47//! assert_eq!("Foo", decoder.string().unwrap());
48//! assert_eq!(true, decoder.end());
49//! ```
50//!
51//! Decoder allows you to retrieve the values in order they were
52//! encoded. Calling the `end` method is optional - it will return
53//! `true` if you have read the entire buffer, ensuring the entire
54//! buffer has been read.
55
56use std::{ mem, fmt, error, str, ptr };
57
58/// Simple error type returned either by the `Decoder` or `Encoder`
59#[derive(Debug)]
60pub enum Error {
61 Utf8Encoding,
62 ReadingOutOfBounds,
63}
64
65impl error::Error for Error {
66 fn description(&self) -> &str {
67 match *self {
68 Error::Utf8Encoding => "Couldn't decode UTF-8 string",
69 Error::ReadingOutOfBounds => "Attempted to read out of bounds",
70 }
71 }
72}
73
74impl fmt::Display for Error {
75 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
76 write!(f, "{}", error::Error::description(self))
77 }
78}
79
80static SIZE_MASKS: [u8; 9] = [
81 0b00000000,
82 0b10000000,
83 0b11000000,
84 0b11100000,
85 0b11110000,
86 0b11111000,
87 0b11111100,
88 0b11111110,
89 0b11111111
90];
91
92/// Encoder takes in typed data and produces a binary buffer
93/// represented as `Vec<u8>`.
94pub struct Encoder {
95 data: Vec<u8>,
96 bool_index: usize,
97 bool_shift: u8,
98}
99
100macro_rules! write_bytes {
101 ($data:expr, $value:ident) => ({
102 unsafe {
103 let size = mem::size_of_val(&$value);
104 let ptr: *const u8 = mem::transmute(&$value.to_be());
105
106 let len = $data.len();
107 $data.reserve(size);
108 $data.set_len(len + size);
109
110 ptr::copy_nonoverlapping(
111 ptr,
112 $data.as_mut_ptr().offset(len as isize),
113 size
114 );
115 }
116 })
117}
118
119impl Encoder {
120 /// Create a new instance of the `Encoder`.
121 #[inline]
122 pub fn new() -> Encoder {
123 Encoder {
124 data: Vec::new(),
125 bool_index: std::usize::MAX,
126 bool_shift: 0,
127 }
128 }
129
130 /// Create a new instance of the `Encoder` with a preallocated buffer capacity.
131 #[inline]
132 pub fn with_capacity(capacity: usize) -> Encoder {
133 Encoder {
134 data: Vec::with_capacity(capacity),
135 bool_index: std::usize::MAX,
136 bool_shift: 0,
137 }
138 }
139
140 /// Store a `u8` on the buffer.
141 #[inline]
142 pub fn uint8(&mut self, uint8: u8) -> &mut Encoder {
143 self.data.push(uint8);
144
145 self
146 }
147
148 /// Store a 'u16' on the buffer.
149 #[inline]
150 pub fn uint16(&mut self, uint16: u16) -> &mut Encoder {
151 write_bytes!(self.data, uint16);
152
153 self
154 }
155
156 /// Store a 'u32' on the buffer.
157 #[inline]
158 pub fn uint32(&mut self, uint32: u32) -> &mut Encoder {
159 write_bytes!(self.data, uint32);
160
161 self
162 }
163
164 /// Store a 'u64' on the buffer.
165 #[inline]
166 pub fn uint64(&mut self, uint64: u64) -> &mut Encoder {
167 write_bytes!(self.data, uint64);
168
169 self
170 }
171
172 /// Store an `i8` on the buffer.
173 #[inline]
174 pub fn int8(&mut self, int8: i8) -> &mut Encoder {
175 self.data.push(int8 as u8);
176
177 self
178 }
179
180 /// Store an `i16` on the buffer.
181 #[inline]
182 pub fn int16(&mut self, int16: i16) -> &mut Encoder {
183 write_bytes!(self.data, int16);
184
185 self
186 }
187
188 #[inline]
189 /// Store an `i32` on the buffer.
190 pub fn int32(&mut self, int32: i32) -> &mut Encoder {
191 write_bytes!(self.data, int32);
192
193 self
194 }
195
196 #[inline]
197 /// Store an `i32` on the buffer.
198 pub fn int64(&mut self, int64: i64) -> &mut Encoder {
199 write_bytes!(self.data, int64);
200
201 self
202 }
203
204 /// Store a `float32` on the buffer.
205 #[inline]
206 pub fn float32(&mut self, float32: f32) -> &mut Encoder {
207 self.uint32(unsafe { mem::transmute(float32) })
208 }
209
210 /// Store a `float64` on the buffer.
211 #[inline]
212 pub fn float64(&mut self, float64: f64) -> &mut Encoder {
213 self.uint64(unsafe { mem::transmute(float64) })
214 }
215
216 /// Store a `bool` on the buffer. Calling `bool` multiple times
217 /// in a row will attempt to store the information on a single
218 /// byte.
219 ///
220 /// ```
221 /// use bitsparrow::Encoder;
222 ///
223 /// let buffer = Encoder::new()
224 /// .bool(true)
225 /// .bool(false)
226 /// .bool(false)
227 /// .bool(false)
228 /// .bool(false)
229 /// .bool(true)
230 /// .bool(true)
231 /// .bool(true)
232 /// .end();
233 ///
234 /// // booleans are stacked as bits on a single byte, right to left.
235 /// assert_eq!(buffer, &[0b11100001]);
236 /// ```
237 #[inline]
238 pub fn bool(&mut self, bool: bool) -> &mut Encoder {
239 let bool_bit: u8 = if bool { 1 } else { 0 };
240 let index = self.data.len();
241
242 if self.bool_index == index && self.bool_shift < 7 {
243 self.bool_shift += 1;
244 self.data[index - 1] = self.data[index - 1] | bool_bit << self.bool_shift;
245 return self;
246 }
247
248 self.bool_index = index + 1;
249 self.bool_shift = 0;
250
251 self.uint8(bool_bit)
252 }
253
254 /// Store a `usize` on the buffer. This will use a variable amount of bytes
255 /// depending on the value of `usize`, making it a very powerful and flexible
256 /// type to send around. BitSparrow uses `size` internally to prefix `string`
257 /// and `bytes` as those can have an arbitrary length, and using a large
258 /// number type such as u32 could be an overkill if all you want to send is
259 /// `"Foo"`. Detailed explanation on how BitSparrow stores `size` can be found
260 /// on [the homepage](http://bitsparrow.io).
261 #[inline]
262 pub fn size(&mut self, size: usize) -> &mut Encoder {
263 if size < 128 {
264 return self.uint8(size as u8);
265 }
266
267 let mut size = size as u64;
268
269 let lead = size.leading_zeros() as usize;
270 let bytes = if lead == 0 { 9 } else { 9 - (lead - 1) / 7 };
271
272 let mut buf: [u8; 9] = unsafe { mem::uninitialized() };
273
274 for i in (1 .. bytes).rev() {
275 buf[i] = size as u8;
276 size >>= 8;
277 }
278 buf[0] = (size as u8) | SIZE_MASKS[bytes - 1];
279
280 self.data.extend_from_slice(&buf[0 .. bytes]);
281
282 self
283 }
284
285 /// Store an arbitary collection of bytes represented as `&[u8]`,
286 /// easy to use by dereferencing `Vec<u8>` with `&`.
287 #[inline]
288 pub fn bytes(&mut self, bytes: &[u8]) -> &mut Encoder {
289 self.size(bytes.len());
290 self.data.extend_from_slice(bytes);
291
292 self
293 }
294
295 /// Store an arbitrary UTF-8 Rust string on the buffer.
296 #[inline]
297 pub fn string(&mut self, string: &str) -> &mut Encoder {
298 self.size(string.len());
299 self.data.extend_from_slice(string.as_bytes());
300
301 self
302 }
303
304 /// Finish encoding, obtain the buffer and reset the encoder.
305 #[inline]
306 pub fn end(&mut self) -> Vec<u8> {
307 self.bool_index = std::usize::MAX;
308 self.bool_shift = 0;
309
310 mem::replace(&mut self.data, Vec::new())
311 }
312}
313
314
315/// Decoder reads from a binary slice buffer (`&[u8]`) and exposes
316/// methods to read BitSparrow types from it in the same order they
317/// were encoded by the `Encoder`.
318pub struct Decoder<'a> {
319 index: usize,
320 data: &'a [u8],
321 bool_index: usize,
322 bool_shift: u8,
323}
324
325macro_rules! read_bytes {
326 ($decoder:expr, $t:ident) => ({
327 let size = mem::size_of::<$t>();
328 let end = $decoder.index + size;
329 if end > $decoder.data.len() {
330 return Err(Error::ReadingOutOfBounds);
331 }
332
333 unsafe {
334 let mut value: $t = mem::uninitialized();
335 let ptr: *mut u8 = mem::transmute(&mut value);
336
337 ptr::copy_nonoverlapping(
338 $decoder.data.as_ptr().offset($decoder.index as isize),
339 ptr,
340 size
341 );
342
343 $decoder.index = end;
344
345 Ok($t::from_be(value))
346 }
347 })
348}
349
350impl<'a> Decoder<'a> {
351 /// Create a new `Decoder` reading from a `&[u8]` slice buffer.
352 #[inline]
353 pub fn new(data: &[u8]) -> Decoder {
354 Decoder {
355 index: 0,
356 data: data,
357 bool_index: std::usize::MAX,
358 bool_shift: 0,
359 }
360 }
361
362 /// Read a `u8` from the buffer and progress the internal index.
363 #[inline]
364 pub fn uint8(&mut self) -> Result<u8, Error> {
365 if self.index >= self.data.len() {
366 return Err(Error::ReadingOutOfBounds);
367 }
368 let uint8 = self.data[self.index];
369 self.index += 1;
370 return Ok(uint8);
371 }
372
373 /// Read a `u16` from the buffer and progress the internal index.
374 #[inline]
375 pub fn uint16(&mut self) -> Result<u16, Error> {
376 read_bytes!(self, u16)
377 }
378
379 /// Read a `u32` from the buffer and progress the internal index.
380 #[inline]
381 pub fn uint32(&mut self) -> Result<u32, Error> {
382 read_bytes!(self, u32)
383 }
384
385 /// Read a `u64` from the buffer and progress the internal index.
386 #[inline]
387 pub fn uint64(&mut self) -> Result<u64, Error> {
388 read_bytes!(self, u64)
389 }
390
391 /// Read an `i8` from the buffer and progress the internal index.
392 #[inline]
393 pub fn int8(&mut self) -> Result<i8, Error> {
394 let uint8 = try!(self.uint8());
395
396 Ok(uint8 as i8)
397 }
398
399 /// Read an `i16` from the buffer and progress the internal index.
400 #[inline]
401 pub fn int16(&mut self) -> Result<i16, Error> {
402 read_bytes!(self, i16)
403 }
404
405 /// Read an `i32` from the buffer and progress the internal index.
406 #[inline]
407 pub fn int32(&mut self) -> Result<i32, Error> {
408 read_bytes!(self, i32)
409 }
410
411 /// Read an `i64` from the buffer and progress the internal index.
412 #[inline]
413 pub fn int64(&mut self) -> Result<i64, Error> {
414 read_bytes!(self, i64)
415 }
416
417 /// Read a `float32` from the buffer and progress the internal index.
418 #[inline]
419 pub fn float32(&mut self) -> Result<f32, Error> {
420 let uint32 = try!(self.uint32());
421
422 Ok(unsafe { mem::transmute(uint32) })
423 }
424
425 /// Read a `float64` from the buffer and progress the internal index.
426 #[inline]
427 pub fn float64(&mut self) -> Result<f64, Error> {
428 let uint64 = try!(self.uint64());
429
430 Ok(unsafe { mem::transmute(uint64) })
431 }
432
433 /// Read a `bool` from the buffer and progress the internal index. If
434 /// a `bool` was previously read from the buffer, calling `bool()`
435 /// on the `Decoder` again will read a boolean from the same index
436 /// without progressing, but instead shifting to read the next bit.
437 /// This behavior is symmetric to how the `Encoder` stores the `bool`s,
438 /// and is completely transparent when using the API.
439 ///
440 /// ```
441 /// use bitsparrow::Decoder;
442 ///
443 /// // Reading `bools` from a single byte.
444 /// let buffer = &[0b11100001];
445 /// let mut decoder = Decoder::new(buffer);
446 ///
447 /// assert_eq!(true, decoder.bool().unwrap());
448 /// assert_eq!(false, decoder.bool().unwrap());
449 /// assert_eq!(false, decoder.bool().unwrap());
450 /// assert_eq!(false, decoder.bool().unwrap());
451 /// assert_eq!(false, decoder.bool().unwrap());
452 /// assert_eq!(true, decoder.bool().unwrap());
453 /// assert_eq!(true, decoder.bool().unwrap());
454 /// assert_eq!(true, decoder.bool().unwrap());
455 ///
456 /// // Ensure we've read the entire buffer
457 /// assert_eq!(true, decoder.end());
458 /// ```
459 pub fn bool(&mut self) -> Result<bool, Error> {
460 if self.bool_index == self.index && self.bool_shift < 7 {
461 self.bool_shift += 1;
462 let bits = self.data[self.index - 1];
463 let bool_bit = 1 << self.bool_shift;
464 return Ok(bits & bool_bit == bool_bit);
465 }
466
467 let bits = try!(self.uint8());
468 self.bool_index = self.index;
469 self.bool_shift = 0;
470
471 Ok(bits & 1 == 1)
472 }
473
474 /// Read a `usize` from the buffer and progress the index. Detailed
475 /// explanation on how BitSparrow stores `size` can be found on
476 /// [the homepage](http://bitsparrow.io).
477 pub fn size(&mut self) -> Result<usize, Error> {
478 let high = try!(self.uint8());
479
480 // 1 byte (no signature)
481 if (high & 128) == 0 {
482 return Ok(high as usize);
483 }
484
485 let mut ext_bytes = (!high).leading_zeros() as usize;
486 let mut size = (high ^ SIZE_MASKS[ext_bytes]) as usize;
487
488 while ext_bytes != 0 {
489 ext_bytes -= 1;
490 size = (size << 8) | try!(self.uint8()) as usize;
491 }
492
493 Ok(size)
494 }
495
496 /// Read an arbitary sized binary data from the buffer and
497 /// progress the index.
498 ///
499 /// **Note:** BitSparrow internally prefixes `bytes` with
500 /// `size` so you don't have to worry about how many bytes
501 /// you need to read.
502 #[inline]
503 pub fn bytes(&mut self) -> Result<&[u8], Error> {
504 // Order of addition is important here!
505 // Calling `size` will modify the `index`.
506 let end = try!(self.size()) + self.index;
507
508 if end > self.data.len() {
509 return Err(Error::ReadingOutOfBounds);
510 }
511
512 let bytes = &self.data[self.index .. end];
513
514 self.index = end;
515
516 Ok(bytes)
517 }
518
519 /// Read an arbitary sized owned `String` from the buffer and
520 /// progress the index.
521 ///
522 /// **Note:** Analog to `bytes`, BitSparrow internally prefixes
523 /// `string` with `size` so you don't have to worry about how
524 /// many bytes you need to read.
525 #[inline]
526 pub fn string(&mut self) -> Result<&str, Error> {
527 str::from_utf8(try!(self.bytes())).map_err(|_| Error::Utf8Encoding)
528 }
529
530 /// Returns `true` if the entire buffer has been read, otherwise
531 /// returns `false`.
532 #[inline]
533 pub fn end(&self) -> bool {
534 self.index >= self.data.len()
535 }
536}