bravely 0.1.0

A utility to load data from Bravely Default and Bravely Second text file formats.
Documentation
#![feature(type_alias_impl_trait)]

/// This module contains various utility traits and functions
/// useful on `Reader` structs.
pub mod util {
    use bytes::Buf;
    use std::borrow::Cow;
    use std::io::{BufRead, Read};

    /// A trait that implements some helper methods that forward to
    /// `bytes::Buf` in order to extract primitive integer types from
    /// any reader.
    pub trait ReadPrimitive: Read {
        /// Gets a signed 32 bit integer from `self` in little-endian order.
        fn read_i32_le(&mut self) -> Result<i32, std::io::Error> {
            let mut buf = [0_u8; 4];
            self.read_exact(&mut buf)?;
            Ok(buf.as_slice().get_i32_le())
        }
    }

    impl<R: Read> ReadPrimitive for R {}

    /// A trait that enables reading UTF-8 strings from any `BufRead`
    /// until a given delimiter byte is found.
    pub trait ReadStringUntil: BufRead {
        /// Reads bytes from `self` into `buf`, **not** including `delim` itself,
        /// until the `delim` byte is found.
        ///
        /// Lossy conversion into UTF-8 is performed.
        ///
        /// Previous contents of `buf` are not cleared.
        fn read_string_until<'buf>(
            &mut self,
            delim: u8,
            buf: &'buf mut Vec<u8>,
        ) -> Result<Cow<'buf, str>, std::io::Error> {
            self.read_until(delim, buf)?;
            Ok(String::from_utf8_lossy(&buf[..buf.len() - 1]))
        }
    }

    impl<R: BufRead> ReadStringUntil for R {}
}

/// The archive module exposes structs that allow reading from
/// Bravely Default & Second `index.fs` and `crowd.fs` files.
/// Various `Reader`s which also implement `Iterator` are provided
/// for ergonomic reading of data.
pub mod archive {
    use crate::util::{ReadPrimitive, ReadStringUntil};

    use crate::data::BtbData;
    use bytes::Buf;
    use std::io::{BufRead, Read, Seek, SeekFrom};

    /// Struct that holds data from a single `index.fs` entry.
    #[derive(Debug)]
    pub struct IndexData {
        data_ptr: i32,
        data_len: i32,
        filename: String,
    }

    impl IndexData {
        pub fn data_ptr(&self) -> i32 {
            self.data_ptr
        }

        pub fn data_len(&self) -> i32 {
            self.data_len
        }

        pub fn filename(&self) -> &str {
            &self.filename
        }
    }

    /// Index reader that implements `Iterator` and yields `IndexData` structs,
    /// simplifying the code necessary to read in data from an `index.fs` file.
    pub struct IndexDataReader<R: BufRead + Seek> {
        reader: R,
        buf: Vec<u8>,
        next_ptr: Option<u64>,
    }

    impl<R: BufRead + Seek> IndexDataReader<R> {
        /// Constructor for an instance of `IndexDataReader`.
        /// This struct is generally intended to be used with a file;
        /// therefore, `reader` is expected to implement `BufRead` and `Seek`.
        pub fn new(reader: R) -> Self {
            Self {
                reader,
                buf: Vec::new(),
                next_ptr: None,
            }
        }

        /// Consume this `IndexDataReader` to construct an iterator over its
        /// corresponding `crowd.fs` file.
        pub fn extract_crowd<C: BufRead + Seek>(self, reader: C) -> CrowdDataReader<C, Self> {
            CrowdDataReader::new(reader, self)
        }
    }

    // IDEA: Yield Results instead of bare Index Data to propagate errors
    // Or maybe not? If we read data from a file and an error occurs,
    // how can we even recover? And if the source is a slice of bytes we can't even fail
    impl<R: BufRead + Seek> Iterator for IndexDataReader<R> {
        type Item = IndexData;

        /// Implementation of the `next` method exposed by the `Iterator` trait.
        fn next(&mut self) -> Option<Self::Item> {
            // We clear the internal buffer every iteration so that we always read a new String
            self.buf.clear();

            // Then update the internal "pointer" to the following entry
            self.next_ptr = match self.next_ptr {
                // `self.next_ptr` is only None at the start and at the end of iteration
                None => self
                    .reader
                    .read_i32_le()
                    .expect("Next index should be valid")
                    .try_into()
                    .ok(),
                // A next_ptr value of 0x00 indicates we have reached the last entry
                Some(0x00) => None,
                // Otherwise, we seek to the next entry's location in the file
                Some(next_ptr) => {
                    self.reader
                        .seek(SeekFrom::Start(next_ptr))
                        .expect("Should not exceed reader bounds");
                    self.reader
                        .read_i32_le()
                        .expect("Next index should be valid")
                        .try_into()
                        .ok()
                }
            };

            #[allow(clippy::question_mark)]
            // We could use the `?` operator, but that would be more cryptic
            if self.next_ptr.is_none() {
                return None;
            }

            let data_ptr = self
                .reader
                .read_i32_le()
                .expect("Should not exceed reader bounds");
            let data_len = self
                .reader
                .read_i32_le()
                .expect("Should not exceed reader bounds");

            // We skip 4 bytes that go unread
            self.reader.seek(SeekFrom::Current(4)).unwrap();

            // If an error occurs during iteration, we stop yielding items
            let data = match self.reader.read_string_until(0x00, &mut self.buf) {
                Ok(string) => string,
                Err(_) => return None,
            };

            Some(IndexData {
                data_ptr,
                data_len,
                filename: data.to_string(), // with Generic Associated Types we could return a Cow itself
            })
        }
    }

    /// Struct that holds data from a single `crowd.fs` entry.
    ///
    /// `data` may contain arbitrary file contents.
    pub struct CrowdData {
        filename: String,
        data: Vec<u8>,
    }

    impl CrowdData {
        /// Private constructor for `CrowdData` entries.
        ///
        /// Should only be called by the `CrowdDataReader` iterator.
        fn new(filename: String, data: Vec<u8>) -> Self {
            Self { filename, data }
        }

        pub fn filename(&self) -> &str {
            &self.filename
        }

        pub fn data(&self) -> &[u8] {
            &self.data
        }

        /// Function that can decompress the contents of any `crowd.fs` file.
        ///
        /// `compressed_crowd` is expected to begin with 0x60 as a magic number,
        /// indicating that it is a DEFLATE-compressed `crowd.fs` entry.
        fn decompress_crowd(compressed_crowd: &[u8]) -> Vec<u8> {
            use flate2::bufread::DeflateDecoder;
            assert_eq!(compressed_crowd[0], 0x60);

            let decompressed_size = ((&compressed_crowd[..4]).get_i32_le() as usize) >> 8;
            let mut buf = Vec::with_capacity(decompressed_size);

            let mut d = DeflateDecoder::new(&compressed_crowd[4..]);
            d.read_to_end(&mut buf).unwrap();

            buf
        }

        pub fn try_into_btb(self) -> Option<BtbData> {
            BtbData::new(self.filename, self.data)
        }
    }

    /// Crowd reader that implements `Iterator` and yields `CrowdData` structs,
    /// simplifying the code necessary to read in data from a `crowd.fs` file.
    ///
    /// Due to the Bravely Default and Bravely Second file structure,
    /// this struct wraps an iterator that yields `IndexData` structs,
    /// such as `IndexDataReader`, which is then used by this iterator
    /// to know where to fetch the entries in its `crowd.fs` file.
    pub struct CrowdDataReader<R: BufRead + Seek, I: Iterator<Item = IndexData>> {
        reader: R,
        index_iter: I,
        buf: Vec<u8>,
    }

    impl<R: BufRead + Seek, I: Iterator<Item = IndexData>> CrowdDataReader<R, I> {
        fn new(reader: R, index_iter: I) -> Self {
            Self {
                reader,
                index_iter,
                buf: Vec::new(),
            }
        }
    }

    impl<R: BufRead + Seek, I: Iterator<Item = IndexData>> Iterator for CrowdDataReader<R, I> {
        type Item = CrowdData;

        /// Implementation of the `next` method exposed by the `Iterator` trait.
        fn next(&mut self) -> Option<Self::Item> {
            // We extract the next entry from the `index.fs` iterator, then
            // convert its `data_len` field into a usize.
            // If conversion is lossy, we panic: this error is unrecoverable.
            let index = self.index_iter.next()?;
            let next_len: usize = index.data_len().try_into().unwrap();

            self.buf.resize(next_len, 0);

            // We move to the next entry's location and then read its contents
            self.reader
                .seek(SeekFrom::Start(index.data_ptr() as u64))
                .unwrap();
            self.reader.read_exact(&mut self.buf).unwrap();

            // Finally, we decompress them and yield them to our caller
            Some(CrowdData::new(
                index.filename,
                CrowdData::decompress_crowd(&self.buf),
            ))
        }
    }
}

/// This module exposes structs and functions to parse the data
/// contained inside extracted `crowd.fs` files.
///
/// At the moment, the primary focus is the text data contained
/// within `.btb` files.
pub mod data {
    use bytes::Buf;
    use std::iter::IntoIterator;
    use std::ops::Range;

    /// Struct that represents the data of a single entry in a `.btb` file.
    ///
    /// It can be created manually or read from a `BtbData` struct through
    /// its `IntoIterator` trait.
    pub struct BtbEntry {
        bin: [u8; 20], // maybe wrap in a new type?
        cmd: String,
        text: String,
    }

    impl BtbEntry {
        pub fn new(bin: [u8; 20], cmd: String, text: String) -> Self {
            Self { bin, cmd, text }
        }

        pub fn bin(&self) -> &[u8; 20] {
            &self.bin
        }

        pub fn cmd(&self) -> &str {
            &self.cmd
        }

        pub fn text(&self) -> &str {
            &self.text
        }
    }

    /// Struct that holds the data for a `.btb` file in memory.
    pub struct BtbData {
        filename: String,
        data: Vec<u8>,

        size: u32, // perhaps redundant as it is the size of `data`

        // ptr + len pairs stored as ranges for convenience's sake;
        // using self-referential slices into `data` would be ideal
        // but it is unsafe
        bin_range: Range<usize>,
        cmd_range: Range<usize>,
        text_range: Range<usize>,

        stride: u32, // supposedly the no. of bytes per entry
        num_entries: u32, // could be used as a size hint for an iterator
    }

    impl BtbData {
        /// Constructor for `BtbData`. Returns an Option as this method
        /// performs basic checks for the `data` Vec to contain a valid
        /// `.btb` header.
        pub fn new(filename: String, data: Vec<u8>) -> Option<Self> {
            // Some files, such as `.fscache`, have no contents;
            // we'd have no data to extract, thus we return None
            if data.len() < 4 {
                return None;
            }

            // We take the data we were given as a slice to be able
            // to use `bytes` methods on it to read its contents
            let mut bytes = data.as_slice();

            // Every `.btb` file begins with the ASCII-encoded string "BTBF"
            let magic = &bytes[..4];
            bytes.advance(4);
            if magic != b"BTBF" {
                return None;
            }

            let size = bytes.get_u32_le();
            assert_eq!(size as usize, data.len(), "Length of the `data` Vec should match the metadata of its header");

            // Note that this assumes the size of `usize` is >= 32B
            let bin_range: Range<_> = {
                let bin_ptr: usize = bytes.get_u32_le().try_into().ok()?;
                let bin_len: usize = bytes.get_u32_le().try_into().ok()?;

                bin_ptr .. bin_ptr + bin_len
            };

            let cmd_range: Range<_> = {
                let cmd_ptr: usize = bytes.get_u32_le().try_into().ok()?;
                let cmd_len: usize = bytes.get_u32_le().try_into().ok()?;

                cmd_ptr .. cmd_ptr + cmd_len
            };

            let text_range: Range<_> = {
                let text_ptr: usize = bytes.get_u32_le().try_into().ok()?;
                let text_len: usize = bytes.get_u32_le().try_into().ok()?;

                text_ptr .. text_ptr + text_len
            };

            let stride = bytes.get_u32_le();
            let num_entries = bytes.get_u32_le();

            Some(Self {
                filename,
                data,
                size,
                bin_range,
                cmd_range,
                text_range,
                stride,
                num_entries,
            })
        }

        pub fn filename(&self) -> &str {
            &self.filename
        }

        pub fn data(&self) -> &[u8] {
            &self.data
        }

        pub fn size(&self) -> u32 {
            self.size
        }

        pub fn stride(&self) -> u32 {
            self.stride
        }

        pub fn num_entries(&self) -> u32 {
            self.num_entries
        }
    }

    impl IntoIterator for BtbData {
        type Item = BtbEntry;
        type IntoIter = impl Iterator<Item = BtbEntry>;

        /// `BtbData`'s implementation of `IntoIterator` returns an opaque
        /// type as the resulting iterator is obtained through the combination
        /// of various different iterator adapters.
        ///
        /// Note that the yielded entries are cloned from the contents of `data`,
        /// for convenience's sake. This might change in the future.
        fn into_iter(self) -> Self::IntoIter {
            // `.btb` files effectively contain three streams of data:
            // binary data, command data and text data.
            // Each "entry" into the file is composed of an entry from these
            // three streams.
            // As they are not contiguous in memory, we zip together iterators
            // from three separate slices into the `data` vec, then map the
            // result to a `BtbEntry`.

            // First we split the data stream into chunks of 20B per entry
            let bin_stream = &self.data[self.bin_range];
            let bin_stream: Vec<[u8; 20]> = bin_stream
                .chunks_exact(20)
                .map(|bytes| {
                    let r: &[u8; 20] = bytes.try_into().unwrap();
                    *r
                })
                .collect();

            // Then we collect the cmd strings from the cmd stream;
            // these strings are UTF-8 encoded and NULL-terminated, so we
            // can split them at the 0x00 byte
            let cmd_stream = String::from_utf8_lossy(&self.data[self.cmd_range]);
            let cmd_stream: Vec<String> = cmd_stream.split(char::from(0)).map(str::to_owned).collect();

            // Finally, we need to read the UTF-16 encoded strings from the
            // text stream; they are also NULL-terminated.
            // We map pairs of bytes to `u16`'s, then read them like before

            // We could look into UTF-16 crates to read this data without
            // unnecessary cloning
            let text_stream: Vec<u16> = self.data[self.text_range]
                .chunks_exact(2)
                .map(|pair| u16::from_le_bytes([pair[0], pair[1]]))
                .collect();
            let text_stream = String::from_utf16_lossy(&text_stream);
            let text_stream: Vec<String> =
                text_stream.split(char::from(0)).map(str::to_owned).collect();

            // Now we just compose the final iterator and map
            // our results into `BtbEntry` structs
            bin_stream
                .into_iter()
                .zip(cmd_stream)
                .zip(text_stream.into_iter())
                .map(|((bin, cmd), text)| BtbEntry::new(bin, cmd, text))
        }
    }
}