#![feature(type_alias_impl_trait)]
/// This module contains various utility traits and functions
/// useful on `Reader` structs.
pub mod util {
use bytes::Buf;
use std::borrow::Cow;
use std::io::{BufRead, Read};
/// A trait that implements some helper methods that forward to
/// `bytes::Buf` in order to extract primitive integer types from
/// any reader.
pub trait ReadPrimitive: Read {
/// Gets a signed 32 bit integer from `self` in little-endian order.
fn read_i32_le(&mut self) -> Result<i32, std::io::Error> {
let mut buf = [0_u8; 4];
self.read_exact(&mut buf)?;
Ok(buf.as_slice().get_i32_le())
}
}
impl<R: Read> ReadPrimitive for R {}
/// A trait that enables reading UTF-8 strings from any `BufRead`
/// until a given delimiter byte is found.
pub trait ReadStringUntil: BufRead {
/// Reads bytes from `self` into `buf`, **not** including `delim` itself,
/// until the `delim` byte is found.
///
/// Lossy conversion into UTF-8 is performed.
///
/// Previous contents of `buf` are not cleared.
fn read_string_until<'buf>(
&mut self,
delim: u8,
buf: &'buf mut Vec<u8>,
) -> Result<Cow<'buf, str>, std::io::Error> {
self.read_until(delim, buf)?;
Ok(String::from_utf8_lossy(&buf[..buf.len() - 1]))
}
}
impl<R: BufRead> ReadStringUntil for R {}
}
/// The archive module exposes structs that allow reading from
/// Bravely Default & Second `index.fs` and `crowd.fs` files.
/// Various `Reader`s which also implement `Iterator` are provided
/// for ergonomic reading of data.
pub mod archive {
use crate::util::{ReadPrimitive, ReadStringUntil};
use crate::data::BtbData;
use bytes::Buf;
use std::io::{BufRead, Read, Seek, SeekFrom};
/// Struct that holds data from a single `index.fs` entry.
#[derive(Debug)]
pub struct IndexData {
data_ptr: i32,
data_len: i32,
filename: String,
}
impl IndexData {
pub fn data_ptr(&self) -> i32 {
self.data_ptr
}
pub fn data_len(&self) -> i32 {
self.data_len
}
pub fn filename(&self) -> &str {
&self.filename
}
}
/// Index reader that implements `Iterator` and yields `IndexData` structs,
/// simplifying the code necessary to read in data from an `index.fs` file.
pub struct IndexDataReader<R: BufRead + Seek> {
reader: R,
buf: Vec<u8>,
next_ptr: Option<u64>,
}
impl<R: BufRead + Seek> IndexDataReader<R> {
/// Constructor for an instance of `IndexDataReader`.
/// This struct is generally intended to be used with a file;
/// therefore, `reader` is expected to implement `BufRead` and `Seek`.
pub fn new(reader: R) -> Self {
Self {
reader,
buf: Vec::new(),
next_ptr: None,
}
}
/// Consume this `IndexDataReader` to construct an iterator over its
/// corresponding `crowd.fs` file.
pub fn extract_crowd<C: BufRead + Seek>(self, reader: C) -> CrowdDataReader<C, Self> {
CrowdDataReader::new(reader, self)
}
}
// IDEA: Yield Results instead of bare Index Data to propagate errors
// Or maybe not? If we read data from a file and an error occurs,
// how can we even recover? And if the source is a slice of bytes we can't even fail
impl<R: BufRead + Seek> Iterator for IndexDataReader<R> {
type Item = IndexData;
/// Implementation of the `next` method exposed by the `Iterator` trait.
fn next(&mut self) -> Option<Self::Item> {
// We clear the internal buffer every iteration so that we always read a new String
self.buf.clear();
// Then update the internal "pointer" to the following entry
self.next_ptr = match self.next_ptr {
// `self.next_ptr` is only None at the start and at the end of iteration
None => self
.reader
.read_i32_le()
.expect("Next index should be valid")
.try_into()
.ok(),
// A next_ptr value of 0x00 indicates we have reached the last entry
Some(0x00) => None,
// Otherwise, we seek to the next entry's location in the file
Some(next_ptr) => {
self.reader
.seek(SeekFrom::Start(next_ptr))
.expect("Should not exceed reader bounds");
self.reader
.read_i32_le()
.expect("Next index should be valid")
.try_into()
.ok()
}
};
#[allow(clippy::question_mark)]
// We could use the `?` operator, but that would be more cryptic
if self.next_ptr.is_none() {
return None;
}
let data_ptr = self
.reader
.read_i32_le()
.expect("Should not exceed reader bounds");
let data_len = self
.reader
.read_i32_le()
.expect("Should not exceed reader bounds");
// We skip 4 bytes that go unread
self.reader.seek(SeekFrom::Current(4)).unwrap();
// If an error occurs during iteration, we stop yielding items
let data = match self.reader.read_string_until(0x00, &mut self.buf) {
Ok(string) => string,
Err(_) => return None,
};
Some(IndexData {
data_ptr,
data_len,
filename: data.to_string(), // with Generic Associated Types we could return a Cow itself
})
}
}
/// Struct that holds data from a single `crowd.fs` entry.
///
/// `data` may contain arbitrary file contents.
pub struct CrowdData {
filename: String,
data: Vec<u8>,
}
impl CrowdData {
/// Private constructor for `CrowdData` entries.
///
/// Should only be called by the `CrowdDataReader` iterator.
fn new(filename: String, data: Vec<u8>) -> Self {
Self { filename, data }
}
pub fn filename(&self) -> &str {
&self.filename
}
pub fn data(&self) -> &[u8] {
&self.data
}
/// Function that can decompress the contents of any `crowd.fs` file.
///
/// `compressed_crowd` is expected to begin with 0x60 as a magic number,
/// indicating that it is a DEFLATE-compressed `crowd.fs` entry.
fn decompress_crowd(compressed_crowd: &[u8]) -> Vec<u8> {
use flate2::bufread::DeflateDecoder;
assert_eq!(compressed_crowd[0], 0x60);
let decompressed_size = ((&compressed_crowd[..4]).get_i32_le() as usize) >> 8;
let mut buf = Vec::with_capacity(decompressed_size);
let mut d = DeflateDecoder::new(&compressed_crowd[4..]);
d.read_to_end(&mut buf).unwrap();
buf
}
pub fn try_into_btb(self) -> Option<BtbData> {
BtbData::new(self.filename, self.data)
}
}
/// Crowd reader that implements `Iterator` and yields `CrowdData` structs,
/// simplifying the code necessary to read in data from a `crowd.fs` file.
///
/// Due to the Bravely Default and Bravely Second file structure,
/// this struct wraps an iterator that yields `IndexData` structs,
/// such as `IndexDataReader`, which is then used by this iterator
/// to know where to fetch the entries in its `crowd.fs` file.
pub struct CrowdDataReader<R: BufRead + Seek, I: Iterator<Item = IndexData>> {
reader: R,
index_iter: I,
buf: Vec<u8>,
}
impl<R: BufRead + Seek, I: Iterator<Item = IndexData>> CrowdDataReader<R, I> {
fn new(reader: R, index_iter: I) -> Self {
Self {
reader,
index_iter,
buf: Vec::new(),
}
}
}
impl<R: BufRead + Seek, I: Iterator<Item = IndexData>> Iterator for CrowdDataReader<R, I> {
type Item = CrowdData;
/// Implementation of the `next` method exposed by the `Iterator` trait.
fn next(&mut self) -> Option<Self::Item> {
// We extract the next entry from the `index.fs` iterator, then
// convert its `data_len` field into a usize.
// If conversion is lossy, we panic: this error is unrecoverable.
let index = self.index_iter.next()?;
let next_len: usize = index.data_len().try_into().unwrap();
self.buf.resize(next_len, 0);
// We move to the next entry's location and then read its contents
self.reader
.seek(SeekFrom::Start(index.data_ptr() as u64))
.unwrap();
self.reader.read_exact(&mut self.buf).unwrap();
// Finally, we decompress them and yield them to our caller
Some(CrowdData::new(
index.filename,
CrowdData::decompress_crowd(&self.buf),
))
}
}
}
/// This module exposes structs and functions to parse the data
/// contained inside extracted `crowd.fs` files.
///
/// At the moment, the primary focus is the text data contained
/// within `.btb` files.
pub mod data {
use bytes::Buf;
use std::iter::IntoIterator;
use std::ops::Range;
/// Struct that represents the data of a single entry in a `.btb` file.
///
/// It can be created manually or read from a `BtbData` struct through
/// its `IntoIterator` trait.
pub struct BtbEntry {
bin: [u8; 20], // maybe wrap in a new type?
cmd: String,
text: String,
}
impl BtbEntry {
pub fn new(bin: [u8; 20], cmd: String, text: String) -> Self {
Self { bin, cmd, text }
}
pub fn bin(&self) -> &[u8; 20] {
&self.bin
}
pub fn cmd(&self) -> &str {
&self.cmd
}
pub fn text(&self) -> &str {
&self.text
}
}
/// Struct that holds the data for a `.btb` file in memory.
pub struct BtbData {
filename: String,
data: Vec<u8>,
size: u32, // perhaps redundant as it is the size of `data`
// ptr + len pairs stored as ranges for convenience's sake;
// using self-referential slices into `data` would be ideal
// but it is unsafe
bin_range: Range<usize>,
cmd_range: Range<usize>,
text_range: Range<usize>,
stride: u32, // supposedly the no. of bytes per entry
num_entries: u32, // could be used as a size hint for an iterator
}
impl BtbData {
/// Constructor for `BtbData`. Returns an Option as this method
/// performs basic checks for the `data` Vec to contain a valid
/// `.btb` header.
pub fn new(filename: String, data: Vec<u8>) -> Option<Self> {
// Some files, such as `.fscache`, have no contents;
// we'd have no data to extract, thus we return None
if data.len() < 4 {
return None;
}
// We take the data we were given as a slice to be able
// to use `bytes` methods on it to read its contents
let mut bytes = data.as_slice();
// Every `.btb` file begins with the ASCII-encoded string "BTBF"
let magic = &bytes[..4];
bytes.advance(4);
if magic != b"BTBF" {
return None;
}
let size = bytes.get_u32_le();
assert_eq!(size as usize, data.len(), "Length of the `data` Vec should match the metadata of its header");
// Note that this assumes the size of `usize` is >= 32B
let bin_range: Range<_> = {
let bin_ptr: usize = bytes.get_u32_le().try_into().ok()?;
let bin_len: usize = bytes.get_u32_le().try_into().ok()?;
bin_ptr .. bin_ptr + bin_len
};
let cmd_range: Range<_> = {
let cmd_ptr: usize = bytes.get_u32_le().try_into().ok()?;
let cmd_len: usize = bytes.get_u32_le().try_into().ok()?;
cmd_ptr .. cmd_ptr + cmd_len
};
let text_range: Range<_> = {
let text_ptr: usize = bytes.get_u32_le().try_into().ok()?;
let text_len: usize = bytes.get_u32_le().try_into().ok()?;
text_ptr .. text_ptr + text_len
};
let stride = bytes.get_u32_le();
let num_entries = bytes.get_u32_le();
Some(Self {
filename,
data,
size,
bin_range,
cmd_range,
text_range,
stride,
num_entries,
})
}
pub fn filename(&self) -> &str {
&self.filename
}
pub fn data(&self) -> &[u8] {
&self.data
}
pub fn size(&self) -> u32 {
self.size
}
pub fn stride(&self) -> u32 {
self.stride
}
pub fn num_entries(&self) -> u32 {
self.num_entries
}
}
impl IntoIterator for BtbData {
type Item = BtbEntry;
type IntoIter = impl Iterator<Item = BtbEntry>;
/// `BtbData`'s implementation of `IntoIterator` returns an opaque
/// type as the resulting iterator is obtained through the combination
/// of various different iterator adapters.
///
/// Note that the yielded entries are cloned from the contents of `data`,
/// for convenience's sake. This might change in the future.
fn into_iter(self) -> Self::IntoIter {
// `.btb` files effectively contain three streams of data:
// binary data, command data and text data.
// Each "entry" into the file is composed of an entry from these
// three streams.
// As they are not contiguous in memory, we zip together iterators
// from three separate slices into the `data` vec, then map the
// result to a `BtbEntry`.
// First we split the data stream into chunks of 20B per entry
let bin_stream = &self.data[self.bin_range];
let bin_stream: Vec<[u8; 20]> = bin_stream
.chunks_exact(20)
.map(|bytes| {
let r: &[u8; 20] = bytes.try_into().unwrap();
*r
})
.collect();
// Then we collect the cmd strings from the cmd stream;
// these strings are UTF-8 encoded and NULL-terminated, so we
// can split them at the 0x00 byte
let cmd_stream = String::from_utf8_lossy(&self.data[self.cmd_range]);
let cmd_stream: Vec<String> = cmd_stream.split(char::from(0)).map(str::to_owned).collect();
// Finally, we need to read the UTF-16 encoded strings from the
// text stream; they are also NULL-terminated.
// We map pairs of bytes to `u16`'s, then read them like before
// We could look into UTF-16 crates to read this data without
// unnecessary cloning
let text_stream: Vec<u16> = self.data[self.text_range]
.chunks_exact(2)
.map(|pair| u16::from_le_bytes([pair[0], pair[1]]))
.collect();
let text_stream = String::from_utf16_lossy(&text_stream);
let text_stream: Vec<String> =
text_stream.split(char::from(0)).map(str::to_owned).collect();
// Now we just compose the final iterator and map
// our results into `BtbEntry` structs
bin_stream
.into_iter()
.zip(cmd_stream)
.zip(text_stream.into_iter())
.map(|((bin, cmd), text)| BtbEntry::new(bin, cmd, text))
}
}
}