hhh 1.0.1

The hhh Binary File Processor
Documentation
// hhh
// Copyright (c) 2023 by Stacy Prowell.  All rights reserved.
// https://gitlab.com/sprowell/hhh

//! Package byte data compactly for later use.

use std::io::Result as IoResult;
use std::io::Seek;
use std::io::Write;
use std::{fs::File, io::SeekFrom, path::PathBuf};

/// Define a struct to hold the byte data as it is parsed.
///
/// ```rust
/// use hhh::byte_data::Bytes;
///
/// let mut bytes = Bytes::default();
/// bytes.add_bytes(&mut vec![0x48u8, 0x65u8, 0x6cu8, 0x6cu8, 0x6fu8, 0x43u8]);
/// bytes.set_offset(5);
/// bytes.add_bytes(&mut vec![0x2cu8, 0x20u8, 0x77u8, 0x6fu8, 0x72u8, 0x6cu8, 0x64u8, 0x21u8]);
/// bytes.set_offset(15);
/// bytes.add_bytes(&mut vec![0xffu8]);
/// assert_eq!(16, bytes.get_offset());
/// bytes.add_byte(0x40u8);
/// bytes.finalize();
/// let result = bytes.get_bytes();
/// assert_eq!(result, vec![
///     0x48u8, 0x65u8, 0x6cu8, 0x6cu8, 0x6fu8, 0x2cu8, 0x20u8, 0x77u8,
///     0x6fu8, 0x72u8, 0x6cu8, 0x64u8, 0x21u8, 0x00u8, 0x00u8, 0xffu8,
///     0x40u8]);
/// ```
///
#[derive(Debug, Clone, Default)]
pub struct Bytes {
    /// The data read.  This is stored as pairs consisting of an offset value and then the
    /// sequence of bytes to store at that offset.  Because subsequent entries can overwrite
    /// later entries, the order is significant.
    ///
    /// This does not contain the current byte sequence being processed.  To make sure it is
    /// not lost, invoke `finalize` when done.
    pub data: Vec<(u64, Vec<u8>)>,

    /// The offset value for the start of the current byte sequence.
    pub offset_value: u64,

    /// The current byte stream starting at the current offset.
    pub byte_sequence: Vec<u8>,
}

impl Bytes {
    /// Make a new byte data structure.
    pub fn new() -> Self {
        Self {
            data: vec![],
            offset_value: 0,
            byte_sequence: vec![],
        }
    }

    /// Finalize the byte sequence.  You should invoke this when done reading to be sure
    /// everything is properly stored.  This can be called multiple times and does not
    /// prevent additional bytes from being added.
    pub fn finalize(&mut self) {
        if !self.byte_sequence.is_empty() {
            let new_offset = self.offset_value + (self.byte_sequence.len() as u64);
            let byte_sequence = std::mem::take(&mut self.byte_sequence);
            self.data.push((self.offset_value, byte_sequence));
            self.offset_value = new_offset;
        }
    }

    /// Specify a new offset.
    pub fn set_offset(&mut self, offset: u64) {
        self.finalize();
        self.offset_value = offset;
    }

    /// Add bytes to the current byte sequence.  The provided vector is drained by this
    /// operation.
    pub fn add_bytes(&mut self, bytes: &[u8]) {
        self.byte_sequence.extend_from_slice(bytes);
    }

    /// Add a single byte to the byte sequence.
    pub fn add_byte(&mut self, byte: u8) {
        self.byte_sequence.push(byte);
    }

    /// Get the offset value of the next byte to be added.
    pub fn get_offset(&mut self) -> u64 {
        self.offset_value + (self.byte_sequence.len() as u64)
    }

    /// Unpack this structure into the full byte array.  Caution: This can be very large.
    /// This implicitly calls `finalize`.
    pub fn get_bytes(&mut self) -> Vec<u8> {
        self.finalize();
        let mut all_bytes = vec![];
        for (offset, bytes) in &self.data {
            let capacity = *offset + (bytes.len() as u64);
            if capacity > (all_bytes.len() as u64) {
                all_bytes.resize(capacity as usize, 0u8);
            }
            for index in 0..bytes.len() {
                all_bytes[*offset as usize + index] = bytes[index];
            }
        }
        all_bytes
    }

    /// Write the bytes to a file.  This implicitly calls `finalize`.
    pub fn write_bytes(&mut self, destination: PathBuf) -> IoResult<()> {
        self.finalize();
        let mut file = File::create(destination)?;
        for (offset, bytes) in &self.data {
            file.seek(SeekFrom::Start(*offset))?;
            file.write_all(bytes)?;
        }
        Ok(())
    }
}

#[cfg(test)]
mod test {

    use std::{fs::File, io::Read, path::PathBuf};

    use super::Bytes;

    #[test]
    fn trivial_test_1() {
        let mut bytes = Bytes::new();
        assert_eq!(bytes.get_offset(), 0);
        assert_eq!(bytes.get_bytes(), vec![]);
    }

    #[test]
    fn trivial_test_2() {
        let mut bytes = Bytes::new();
        bytes.finalize();
        bytes.add_byte(0);
        assert_eq!(bytes.get_offset(), 1);
        assert_eq!(bytes.get_bytes(), vec![0u8]);
    }

    #[test]
    fn file_test_1() -> std::io::Result<()> {
        let mut bytes = Bytes::new();
        bytes.add_bytes(b"Erase me!");
        bytes.finalize();
        bytes.write_bytes(PathBuf::from("eraseme.txt"))?;
        let mut file = File::open("eraseme.txt").unwrap();
        let mut content = vec![];
        file.read_to_end(&mut content)?;
        assert_eq!("Erase me!".as_bytes(), content);
        std::fs::remove_file("eraseme.txt")?;
        Ok(())
    }
}