bitbottle 0.10.0

a modern archive file format
Documentation
use std::cmp::min;
use std::io::{Error, ErrorKind, Read, Result, Write};

const MIN_BLOCK_BITS: usize = 6;
const MAX_BLOCK_BITS: usize = 24;


/// Buffer incoming data from a source and generate bitbottle frames, using a
/// preferred block size. Each frame begins with a single byte of the format
/// `yyxxxxxx`, where the size of the frame is `x << (y * 6)`, x is 0 - 63,
/// and y is 0 - 3. If y == 0, this is the last frame. The frame bytes where
/// x == 0 and y > 0 are reserved.
pub struct FramingWriter<W: Write> {
    output: W,
    buffer: Vec<u8>,
    buffer_used: usize,
}

impl<W: Write> FramingWriter<W> {
    /// Create a new framing writer that writes framed blocks to an output.
    /// The block size will be `2 ** block_bits`.
    pub fn new(output: W, block_bits: usize) -> FramingWriter<W> {
        assert!(block_bits >= MIN_BLOCK_BITS, "Try harder, Homer");
        assert!(block_bits < MAX_BLOCK_BITS, "Frames must be smaller than 16MB");
        // space for an entire block, plus the prefix byte
        let mut buffer = vec![0u8; 1 + (1 << block_bits)];
        // pre-calculate the prefix byte
        buffer[0] = (((block_bits / 6) as u8) << 6) | (1 << (block_bits % 6)) as u8;
        FramingWriter { output, buffer, buffer_used: 1 }
    }

    pub fn close(mut self) -> Result<W> {
        // drain the remaining buffer, which may take up to 4 frames.
        let mut start = 1;
        let mut wrote_zero = false;
        while start < self.buffer_used {
            let (prefix, len) = next_frame_size(self.buffer_used - start);
            if len < 64 {
                wrote_zero = true;
            }
            self.buffer[start - 1] = prefix;
            self.output.write_all(&self.buffer[start - 1 .. start + len])?;
            start += len;
        }
        if !wrote_zero {
            self.output.write_all(&[ 0u8 ])?;
        }
        Ok(self.output)
    }
}

impl<W: Write> Write for FramingWriter<W> {
    fn write(&mut self, data: &[u8]) -> Result<usize> {
        let len = min(self.buffer.len() - self.buffer_used, data.len());
        self.buffer[self.buffer_used .. self.buffer_used + len].copy_from_slice(&data[..len]);
        self.buffer_used += len;
        if self.buffer_used == self.buffer.len() {
            // that's numberwang!
            self.output.write_all(&self.buffer)?;
            self.buffer_used = 1;
        }
        Ok(len)
    }

    fn flush(&mut self) -> Result<()> {
        // no thanks!
        Ok(())
    }
}


// given a size, return a frame header for the closest available size, and
// what size was chosen.
fn next_frame_size(size: usize) -> (u8, usize) {
    for y in [ 3, 2, 1, 0 ] {
        if size >= (1 << (y * 6)) {
            // 256K/4K/64/1 frame
            let scale = y * 6;
            let span = size >> scale;
            return (((y as u8) << 6) | span as u8, span << scale);
        }
    }
    (0, 0)
}


/// Read a bitbottle framed stream and generate the raw data.
pub struct UnframingReader<R: Read> {
    inner: R,

    // current frame
    frame_size: usize,
    frame_read: usize,

    // have we read the a frame header with y = 0?
    last_frame: bool,
}

impl<R: Read> UnframingReader<R> {
    pub fn new(reader: R) -> UnframingReader<R> {
        UnframingReader {
            inner: reader,
            frame_size: 0,
            frame_read: 0,
            last_frame: false,
        }
    }

    pub fn close(self) -> R {
        self.inner
    }
}

impl<R: Read> Read for UnframingReader<R> {
    fn read(&mut self, buffer: &mut [u8]) -> Result<usize> {
        if buffer.is_empty() {
            return Ok(0)
        }

        let mut index = 0;
        while index < buffer.len() {
            if self.frame_read == self.frame_size {
                if self.last_frame {
                    // that was it.
                    return Ok(index);
                }
                // read more, if we can.
                let mut header = [0u8; 1];
                self.inner.read_exact(&mut header)?;
                let span = (header[0] & 0x3f) as usize;
                let scale = 6 * ((header[0] >> 6) as usize);
                if span == 0 && scale > 0 {
                    return Err(Error::new(ErrorKind::InvalidData, "invalid frame header"));
                }
                self.frame_size = span << scale;
                self.frame_read = 0;
                if scale == 0 {
                    self.last_frame = true;
                }
            }

            let size = min(self.frame_size - self.frame_read, buffer.len() - index);
            if size > 0 {
                let n = self.inner.read(&mut buffer[index .. index + size])?;
                self.frame_read += n;
                index += n;
            }
        }
        Ok(index)
    }
}


// ----- tests

#[cfg(test)]
mod test {
    use hex::encode;
    use std::collections::VecDeque;
    use std::cmp::min;
    use std::io::{Cursor, Read, Result, Write};
    use super::{FramingWriter, UnframingReader};

    fn frame_writer(data: &[u8], block_bits: usize) -> Vec<u8> {
        let mut buffer = Vec::new();
        let mut framing_writer = FramingWriter::new(&mut buffer, block_bits);
        framing_writer.write_all(data).unwrap();
        framing_writer.close().unwrap();
        buffer
    }

    fn unframe_reader<'a>(buffer: &'a mut [u8], reader: &mut impl Read) -> &'a [u8] {
        let mut size = 0;
        let mut unframing_reader = UnframingReader::new(reader);
        while size < buffer.len() {
            let n = unframing_reader.read(&mut buffer[size..]).unwrap();
            if n == 0 { break; }
            size += n;
        }
        &buffer[..size]
    }

    fn unframe<'a>(buffer: &'a mut [u8], data: &[u8]) -> &'a [u8] {
        unframe_reader(buffer, &mut Cursor::new(data))
    }

    struct SlowReader {
        buffers: VecDeque<Vec<u8>>,
    }

    impl Read for SlowReader {
        fn read(&mut self, buffer: &mut [u8]) -> Result<usize> {
            match self.buffers.pop_front() {
                None => {
                    Ok(0)
                }
                Some(data) => {
                    let size = min(data.len(), buffer.len());
                    buffer[0..size].copy_from_slice(&data[0..size]);
                    if size < data.len() {
                        self.buffers.push_front(data[size..].into());
                    }
                    Ok(size)
                }
            }
        }
    }


    #[test]
    fn small_frame() {
        let data: &[u8] = &[ 1, 2, 3 ];
        assert_eq!(encode(frame_writer(data, 20)), "03010203");
    }

    #[test]
    fn frame_perfect_block_size() {
        let data1 = [0u8; 0];
        assert_eq!(encode(frame_writer(&data1, 20)), "00");
        let data2 = [0u8; 64];
        assert_eq!(encode(frame_writer(&data2, 20)), "41".to_string() + &encode(data2) + "00");
    }

    #[test]
    fn frame_slow_data_writer() {
        let data = [
            vec![ 1u8, 2u8 ],
            vec![ 3u8, 4u8 ],
            vec![ 5u8, 6u8, 7u8, 8u8, 9u8 ],
            vec![ 10u8, 11u8, 12u8 ],
        ];
        let mut buffer = Vec::new();
        {
            let mut framing_writer = FramingWriter::new(&mut buffer, 20);
            for chunk in data {
                framing_writer.write_all(&chunk).unwrap();
            }
            framing_writer.close().unwrap();
        }
        assert_eq!(encode(buffer), "0c0102030405060708090a0b0c");
    }

    #[test]
    fn frame_splits_block() {
        let data = vec![0u8; 70];
        let expect = String::from("41") +
            "0000000000000000000000000000000000000000000000000000000000000000" +
            "0000000000000000000000000000000000000000000000000000000000000000" +
            "06000000000000";
        assert_eq!(encode(frame_writer(&data, 6)), expect);
    }

    #[test]
    fn frame_odd_block() {
        let data = vec![0u8; 0x7eedd];
        let framed = frame_writer(&data, 20);

        assert_eq!(framed.len(), 0x7eedd + 4);
        // 1, 3e, 3b, 1d
        assert_eq!(framed[0], 0xc0 + 0x01);
        assert_eq!(framed[1 + 0x4_0000], 0x80 + 0x3e);
        assert_eq!(framed[2 + 0x7_e000], 0x40 + 0x3b);
        assert_eq!(framed[3 + 0x7_eec0], 0x1d);
        framed.iter().enumerate().take(0x7eedd + 4).for_each(|(i, b)| {
            if i != 0 && i != 1 + 0x4_0000 && i != 2 + 0x7_e000 && i != 3 + 0x7_eec0 {
                assert_eq!(*b, 0);
            }
        });
    }

    #[test]
    fn frame_odd_block_writer() {
        let data = vec![0u8; 0x7eedd];
        let framed = frame_writer(&data, 20);

        assert_eq!(framed.len(), 0x7eedd + 4);
        // 1, 3e, 3b, 1d
        assert_eq!(framed[0], 0xc0 + 0x01);
        assert_eq!(framed[1 + 0x4_0000], 0x80 + 0x3e);
        assert_eq!(framed[2 + 0x7_e000], 0x40 + 0x3b);
        assert_eq!(framed[3 + 0x7_eec0], 0x1d);
        framed.iter().enumerate().take(0x7eedd + 3).for_each(|(i, b)| {
            if i != 0 && i != 1 + 0x4_0000 && i != 2 + 0x7_e000 && i != 3 + 0x7_eec0 {
                assert_eq!(*b, 0);
            }
        });
    }

    #[test]
    fn multi_split() {
        let data = vec![0xeeu8; 0x409];
        let framed = frame_writer(&data, 8);

        assert_eq!(framed.len(), 0x409 + 5);
        framed.iter().enumerate().take(0x40e).for_each(|(i, b)| {
            if i == 0 || i == 0x101 || i == 0x202 || i == 0x303 {
                assert_eq!(*b, 0x40 + 0x04);
            } else if i == 0x404 {
                assert_eq!(*b, 0x9);
            } else {
                assert_eq!(*b, 0xee);
            }
        });
    }

    #[test]
    fn multi_split_writer() {
        let data = vec![0xeeu8; 0x409];
        let framed = frame_writer(&data, 8);

        assert_eq!(framed.len(), 0x409 + 5);
        framed.iter().enumerate().take(0x40e).for_each(|(i, b)| {
            if i == 0 || i == 0x101 || i == 0x202 || i == 0x303 {
                assert_eq!(*b, 0x40 + 0x04);
            } else if i == 0x404 {
                assert_eq!(*b, 0x9);
            } else {
                assert_eq!(*b, 0xee);
            }
        });
    }

    #[test]
    fn small_unframe() {
        let data: &[u8] = &[ 3, 1, 2, 3 ];
        let mut buffer = [0u8; 16];
        assert_eq!(encode(unframe(&mut buffer, data)), "010203");
    }

    #[test]
    fn unframe_slow_data() {
        let buffers = VecDeque::from(vec![
            vec![ 0x0cu8, 1u8, 2u8 ],
            vec![ 3u8, 4u8 ],
            vec![ 5u8, 6u8, 7u8, 8u8, 9u8 ],
            vec![ 10u8, 11u8, 12u8 ],
        ]);
        let mut reader = SlowReader { buffers };
        let mut buffer = [0u8; 16];
        assert_eq!(encode(unframe_reader(&mut buffer, &mut reader)), "0102030405060708090a0b0c");
    }

    #[test]
    fn unframe_multiple_blocks() {
        let mut data = [0u8; 70];
        data[0] = 0x41;
        data[64] = 0xff;
        data[65] = 4;
        data[69] = 0xee;
        let mut buffer = [0u8; 128];
        let mut expect = [0u8; 68];
        expect[63] = 0xff;
        expect[67] = 0xee;
        assert_eq!(unframe(&mut buffer, &data), expect);
    }

    #[test]
    fn unframe_odd_blocks() {
        let expect = [255u8; 0x7eedd];
        let mut data = [255u8; 0x7eedd + 4];
        data[0] = 0xc0 + 0x01;
        data[1 + 0x4_0000] = 0x80 + 0x3e;
        data[2 + 0x7_e000] = 0x40 + 0x3b;
        data[3 + 0x7_eec0] = 0x1d;
        let mut buffer = [0u8; 0x80000];
        assert_eq!(unframe(&mut buffer, &data), expect);
    }
}