wt_blk 0.3.1

Parser and unpacker for the BLK file format
Documentation
use std::{io::Read, isize, iter::once, ops::Add, sync::Arc};

use color_eyre::{eyre::ContextCompat, Report};
use itertools::Itertools;
use zstd::Decoder;

use crate::blk::{blk_string::BlkString, leb128::uleb128_offset};

/// A name map is a collection of shared strings across an entire VROMF file
/// Its usually in the top-level directory and called `nm` or in the binary vromf : `0xff 0x3f nm` (prefixed with a pair of seemingly random bytes)
#[derive(Clone, Debug)]
pub struct NameMap {
	pub binary: Vec<u8>,
	pub parsed: Arc<Vec<BlkString>>,
}

impl NameMap {
	pub fn idx_parsed(&self, idx: usize) -> Option<&BlkString> {
		self.parsed.get(idx)
	}

	pub fn from_encoded_file(file: &[u8]) -> Result<Self, Report> {
		let decoded = Self::decode_nm_file(file)?;

		let names = Self::parse_slim_nm(&decoded)?;

		Ok(Self {
			parsed: Arc::new(names),
			binary: decoded,
		})
	}

	pub fn decode_nm_file(file: &[u8]) -> Result<Vec<u8>, Report> {
		let _names_digest = &file.get(0..8).context(format!(
			"File out of bounds for range 0..8, found len: {}",
			file.len()
		))?;
		let _dict_digest = &file[8..40];
		let mut zstd_stream = &file[40..];
		let mut decoder = Decoder::new(&mut zstd_stream)?;
		let mut out = Vec::with_capacity(file.len());
		let _ = decoder.read_to_end(&mut out)?;
		Ok(out)
	}

	pub fn parse_name_section(file: &[u8]) -> Vec<BlkString> {
		once(-1_isize)
			.chain(memchr::memchr_iter(b'\0', file).map(|u| u as isize))
			.tuple_windows::<(isize, isize)>()
			.map(|(start, end)| {
				BlkString::from_lossy(&file[(start.add(1) as usize)..(end as usize)])
			})
			.collect()
	}

	pub fn parse_slim_nm(name_map: &[u8]) -> color_eyre::Result<Vec<BlkString>> {
		let mut nm_ptr = 0;

		let names_count = uleb128_offset(&name_map[nm_ptr..], &mut nm_ptr)?;

		let names_data_size = uleb128_offset(&name_map[nm_ptr..], &mut nm_ptr)?;

		let names = NameMap::parse_name_section(&name_map[nm_ptr..(nm_ptr + names_data_size)]);

		if names_count != names.len() {
			panic!("Should be equal"); // TODO: Change to result when fn signature allows for it
		}

		Ok(names)
	}
}

#[cfg(test)]
mod test {
	use std::fs;

	use crate::blk::{leb128::uleb128, name_map::NameMap};

	#[test]
	fn test_any_stream() {
		let decoded = NameMap::parse_name_section("a\0b\0c\0".as_bytes());
		assert_eq!(
			vec!["a", "b", "c"],
			decoded
				.iter()
				.map(|x| x.to_string())
				.collect::<Vec<String>>()
		)
	}

	#[test]
	fn test_nm_file() {
		let file = fs::read("./samples/nm").unwrap();
		let decoded = NameMap::decode_nm_file(&file).unwrap();
		assert_eq!(&fs::read("./samples/names").unwrap(), &decoded)
	}

	#[test]
	fn nm_parity() {
		let nm = fs::read("../wt_blk/samples/rendist/nm").unwrap();
		let nm = NameMap::decode_nm_file(&nm).unwrap();

		let mut nm_ptr = 0;

		let (offset, _names_count) = uleb128(&nm[nm_ptr..]).unwrap();
		nm_ptr += offset;

		let (offset, names_data_size) = uleb128(&nm[nm_ptr..]).unwrap();
		nm_ptr += offset;

		let _old = {
			let mut buff = vec![];
			let mut names = vec![];
			for val in &nm[nm_ptr..(nm_ptr + names_data_size)] {
				if *val == 0 {
					if let Ok(good) = String::from_utf8(buff.clone()) {
						names.push(good);
					} else {
						println!("{:?}", String::from_utf8_lossy(&buff));
					}
					buff.clear();
				} else {
					buff.push(*val);
				}
			}
			names
		};
		// let new = {
		// 	nm[nm_ptr..(nm_ptr + names_data_size)].split(|b| *b == 0).map(|bs| String::from_utf8_lossy(bs).to_string()).collect::<Vec<_>>()
		// };
		// assert_eq!(old, new);
	}
}