bipack_ru 0.4.4

binary size-effective format used in Divan smart contracts, wasm bindings, network protocols, etc.
Documentation
// Copyright 2023 by Sergey S. Chernov.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! # Bipack codec
//!
//! The set of tools to effectively encode and decode bipack values. It is internationally
//! minimalistic to be used wit Divan smart-contracts where number of instructions could
//! be important.
//!
//! - [bipack_source::BipackSource] is used to decode values, there is implementation
//!   [bipack_source::SliceSource] that parses binary slice. The trait only needs byte-read
//!   method for the implementation.
//!
//! - [bipack_sink::BipackSink] trait that is also implemented for [`Vec<u8>`] allows to encode values
//!   into the bipack format. It is the same simple to implement it for any else binary data
//!   source.
//!
//! ## Utilities
//!
//! - to simplify encoding of unsigned ints the [bipack_sink::IntoU64] trait is used with
//!   implementation for usual u* types.
//!
//! - [tools::to_dump] utility function converts binary data into human-readable dump as in old good
//!   times (address, bytes, ASCII characters).
//!
//! - [tools::StringBuilder] minimalistic growing strings builder.
//!
//! ## About Bipack format
//!
//! This is a binary format created around the idea of bit-effectiveness and not disclosing
//! inner data structure. Unlike many known binary and text formats, like JSON, BSON, BOSS, and
//! many others, it does not includes field names into packed binaries.
//!
//! It also uses rationally-packed variable length format very effective for unsigned integers of
//! various sizes. This implementation supports sizes for u8, u16, u32 and u64. It is capable of
//! holding longer values too but for big numbers the fixed size encoding is mostly more effective.
//! This rational encoding format is called `smartint` and is internally used everywhere when one
//! need to pack unsigned number, unless the fixed size is important.
//! 
//! ### Varint encoding
//! 
//! Smart variable-length long encoding tools, async. It gives byte-size gain from 64 bits numbers,
//! so it is very useful when encoding big numbers or at least very bui long values. In other cases
//! [bipack_sink::BipackSink::put_unsigned] works faster, and extra bits it uses does not play
//!
//! | Bytes sz | varint bits | smartint bits |
//! |:-----:|:------:|:---------:|
//! |   1   |    7   |     6     |
//! |   2   |    14  |    14     |
//! |   3   |    21  |    22     |
//! |   4   |    28  |    29     |
//! |   5   |    35  |    36     |
//! |   6+  |    7*N |   7*N+1   |
//! |   9   |    63  |   64      |
//! |   10  |    64  |   ---     |
//!
//! In other words, except for very small numbers smartint
//! gives 1 data bit gain for the same packed byte size. For example,
//! full size 64 bits number with smartint takes one byte less (9 bytes vs. 10 in Varint).
//!
//! So, except for values in range 32..63 it gives same or better byte size effectiveness
//! than `Varint`. In particular:
//!
//! The effect of it could be interpreted as:
//!
//! | number values | size  |
//! |:--------------|:------:|
//! | 0..31 | same |
//! | 32..63 | worse 1 byte |
//! | 64..1048573 | same |
//! | 1048576..2097151 | 1 byte better |
//! | 2097152..134217727 | same |
//! | 134217728..268435456 | 1 byte better |
//!
//! etc.
//!
//! ## Encoding format
//!
//! Enncoded data could be 1 or more bytes in length. Data are
//! packed as follows:
//!
//! | byte offset | bits range | field |
//! |-------------|------------|-------|
//! | 0 | 0..1 | type |
//! | 0 | 2..7 | v0 |
//! | 1 | 0..7 | v1 (when used) |
//! | 2 | 0..7 | v2 (when used) |
//!
//! Then depending on the `type` field:
//!
//! | type | encoded |
//! |------|---------|
//! | 0 | v0 is the result 0..64 (or -32..32) |
//! | 1 | v0 ## v1 are the result, 14 bits |
//! | 2 | v0  ## v1 ## v2 are the result, 22bits
//! | 3 | v0, ## v1 ## v2 ## (varint encoded rest) |
//!
//! Where `##` means bits concatenation. The bits are interpreted as BIG ENDIAN,
//! for example `24573` will be encoded to `EA FF 02`
//!
//!

#![allow(dead_code)]
#![allow(unused_variables)]

pub mod bipack_source;
pub mod bipack_sink;
pub mod tools;
pub mod bipack;
pub mod error;
pub mod ser;
pub mod de;
pub mod crc;
pub mod contrail;
pub mod fixint;
pub mod buffer_sink;

pub use serde::{Deserialize,Serialize};

#[cfg(test)]
mod tests {
    use base64::Engine;

    use crate::bipack;
    use crate::bipack::{BiPackable, BiUnpackable};
    use crate::bipack_sink::BipackSink;
    use crate::bipack_source::{BipackSource, Result, SliceSource};
    use crate::tools::to_dump;

    #[test]
    fn fixed_unpack() -> Result<()> {
        let mut src = Vec::new();
        base64::engine::general_purpose::STANDARD_NO_PAD
            .decode_vec("B/oAAAEB0AAAANjLgKAv", &mut src)
            .expect("decoded vector");
        println!(": {}", hex::encode(&src));
        let mut ss = SliceSource::from(&src);
        assert_eq!(7, ss.get_u8()?);
        assert_eq!(64000, ss.get_u16()?);
        assert_eq!(66000, ss.get_u32()?);
        assert_eq!(931127140399, ss.get_u64()?);
        Ok(())
    }

    #[test]
    fn smartint_unpack() -> Result<()> {
        let mut src = Vec::new();
        base64::engine::general_purpose::STANDARD_NO_PAD
            .decode_vec("BwLoA0IHBL+AAq7GDQ", &mut src)
            .expect("decoded vector");
        // println!("{}", hex::encode(&src));
        let mut ss = SliceSource::from(&src);
        assert_eq!(7, ss.get_u8()?);
        assert_eq!(64000, ss.get_packed_u16()?);
        assert_eq!(66000, ss.get_packed_u32()?);
        assert_eq!(931127140399, ss.get_unsigned()?);
        Ok(())
    }

    #[test]
    fn fixed_pack() {
        let mut data: Vec<u8> = Vec::new();
        data.put_u8(7).unwrap();
        data.put_u16(64000).unwrap();
        data.put_u32(66000).unwrap();
        data.put_u64(931127140399).unwrap();
        assert_eq!("07fa00000101d0000000d8cb80a02f", hex::encode(&data));
    }

    #[test]
    fn smart_pack() {
        let mut data: Vec<u8> = Vec::new();
        data.put_u8(7).unwrap();
        data.put_unsigned(64000u16).unwrap();
        data.put_unsigned(66000u32).unwrap();
        data.put_unsigned(931127140399u64).unwrap();
        // println!("?? {}", hex::encode(&data));
        assert_eq!("0702e803420704bf8002aec60d", hex::encode(&data));
    }

    #[test]
    fn pack_varbinaries_and_string() {
        let mut data = Vec::<u8>::new();
        data.put_str("Hello, rupack!").unwrap();
        println!("size ${}\n{}",data.len(), to_dump(&data));
        let mut src = SliceSource::from(&data);
        assert_eq!("Hello, rupack!", src.get_str().unwrap());
    }

    #[test]
    fn test_signed() -> Result<()> {
        fn test64(value: i64) -> Result<()> {
            let mut x = Vec::new();
            x.put_i64(value).unwrap();
            assert_eq!(value, SliceSource::from(&x).get_i64()?);
            Ok(())
        }
        test64(0)?;
        test64(1)?;
        test64(-1)?;
        test64(9223372036854775807)?;
        test64(-9223372036854775808)?;
        fn test32(value: i32) -> Result<()> {
            let mut x = Vec::new();
            x.put_i32(value).unwrap();
            assert_eq!(value, SliceSource::from(&x).get_i32()?);
            Ok(())
        }
        test32(0)?;
        test32(1)?;
        test32(-1)?;
        test32(2147483647)?;
        test32(-2147483648)?;
        fn test16(value: i16) -> Result<()> {
            let mut x = Vec::new();
            x.put_i16(value).unwrap();
            assert_eq!(value, SliceSource::from(&x).get_i16()?);
            Ok(())
        }
        test16(0)?;
        test16(1)?;
        test16(-1)?;
        test16(32767)?;
        test16(-32768)?;
        Ok(())
    }

    #[test]
    fn test_dump() {
        for l in 0..64 {
            let mut d2 = Vec::new();
            for u in 0..l {
                d2.push(u as u8);
            }
            // println!("size {}\n{}", d2.len(), to_dump(&d2));
            if d2.len() == 41 {
                let x = to_dump(&d2);
                assert_eq!(x, "0000 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f |................|
0010 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f |................|
0020 20 21 22 23 24 25 26 27 28                      | !\"#$%&'(       |\n");
            }
        }
    }

    #[test]
    fn test_varsigned() -> Result<()> {
        fn test(value: i64) -> Result<()> {
            let mut x = Vec::new();
            x.put_signed(value).unwrap();
            assert_eq!(value, SliceSource::from(&x).get_signed()?);
            Ok(())
        }
        fn test2(value: i64) -> Result<()> {
            test(value)?;
            test(-value)?;
            Ok(())
        }
        test(0)?;
        test2(1)?;
        test2(2)?;
        test2(64)?;
        test2(65)?;
        test2(127)?;
        test2(128)?;
        test2(255)?;
        test2(256)?;
        test2(2147483647)?;
        test2(2222147483647)?;
        Ok(())
    }

        #[test]
    fn test_packer() -> Result<()>{
        let a = 177u32;
        let b = "hello!";
        let sink = bipack!(a, b);
        println!("{}", to_dump(&sink));
        let mut source = SliceSource::from(&sink);
        let a1 = u32::bi_unpack(&mut source)?;
        let s1 = String::bi_unpack(&mut source)?;
        assert_eq!(177u32, a1);
        assert_eq!("hello!", s1);
        Ok(())
    }
}