use std::{
borrow::Cow,
cell::RefCell,
collections::HashMap,
fs::File,
hash::{BuildHasher, Hasher},
io::{BufReader, Read, Seek, SeekFrom},
path::Path,
};
use anyhow::{Context, anyhow};
use bytes::Bytes;
use iterators_extended::bucket::Bucket;
use crate::{
file_parsers::{
FileParser,
bundle::BundleParser,
bundle_index::{BundleIndexParser, types::BundleIndexFile},
ggpk::{
parser::parse_ggpk,
types::{Entry, EntryData, GGPKFile},
},
},
fs::FileSystem,
hasher::murmur64a::BuildMurmurHash64A,
path::parse_paths,
};
#[derive(Debug, Clone)]
struct FileInfo {
offset: usize,
length: usize,
}
pub struct GGPKFS {
file: RefCell<BufReader<File>>,
index: GGPKFile,
lut: HashMap<u64, FileInfo>,
}
const HASHER: BuildMurmurHash64A = BuildMurmurHash64A { seed: 0x1337b33f };
fn enumerate_file_info(
entries: &[Entry],
prefix: Option<String>,
) -> impl Iterator<Item = (u64, FileInfo)> {
let prefix = prefix.unwrap_or_default();
entries
.iter()
.flat_map(move |e| -> Box<dyn Iterator<Item = (u64, FileInfo)>> {
let mut name = format!("{prefix}{}", e.name);
match &e.data {
EntryData::Dir(items) => {
name.push('/');
Box::new(enumerate_file_info(items, Some(name)))
}
&EntryData::File { offset, length } => {
let hash = HASHER.hash_one(name.to_lowercase().as_bytes());
Box::new(std::iter::once((hash, FileInfo { offset, length })))
}
}
})
}
impl GGPKFS {
pub fn new(ggpk_path: &Path) -> anyhow::Result<Self> {
let mut file = BufReader::new(File::open(ggpk_path)?);
let index = parse_ggpk(&mut file)?;
let lut = HashMap::from_iter(enumerate_file_info(&index.entries, None));
Ok(Self {
file: RefCell::new(file),
index,
lut,
})
}
fn _read(&self, offset: usize, length: usize) -> std::io::Result<Bytes> {
let mut file = self.file.borrow_mut();
file.seek(SeekFrom::Start(offset as u64))?;
let mut buf = vec![0; length];
file.read_exact(&mut buf)?;
let buf = Bytes::from(buf);
Ok(buf)
}
}
fn enumerate_file_names(entries: &[Entry], prefix: Option<String>) -> impl Iterator<Item = String> {
let prefix = prefix.unwrap_or_default();
entries
.iter()
.flat_map(move |e| -> Box<dyn Iterator<Item = String>> {
let mut name = format!("{prefix}{}", e.name);
match &e.data {
EntryData::Dir(items) => {
name.push('/');
Box::new(enumerate_file_names(items, Some(name)))
}
EntryData::File { .. } => Box::new(std::iter::once(name.to_lowercase())),
}
})
}
impl FileSystem for GGPKFS {
fn list(&self) -> Box<dyn Iterator<Item = String> + '_> {
Box::new(enumerate_file_names(&self.index.entries, None))
}
#[allow(clippy::type_complexity)]
fn batch_read<'a>(
&'a self,
paths: &'a [impl AsRef<str>],
) -> Box<dyn Iterator<Item = (Cow<'a, str>, anyhow::Result<Bytes>)> + 'a> {
let (mut fileinfos, errors) = paths
.iter()
.map(|path| {
let path = path.as_ref();
let hash = HASHER.hash_one(path.to_lowercase().as_bytes());
match self.lut.get(&hash) {
Some(f) => Ok((path, f)),
None => Err((path, Err(anyhow!("Path not found in index: {}", path)))),
}
})
.bucket_result();
fileinfos.sort_unstable_by_key(|(_, f)| f.offset);
let file_contents = fileinfos.into_iter().map(|(path, fileinfo)| {
let res = self
._read(fileinfo.offset, fileinfo.length)
.context("Failed to read file");
(path, res)
});
Box::new(
errors
.into_iter()
.chain(file_contents)
.map(|(path, r)| (Cow::Borrowed(path), r)),
)
}
fn read(&self, path: &str) -> anyhow::Result<Bytes> {
let hash = HASHER.hash_one(path.to_lowercase().as_bytes());
let fileinfo = self
.lut
.get(&hash)
.with_context(|| format!("Path not found in index: {}", path))?;
let buf = self._read(fileinfo.offset, fileinfo.length)?;
Ok(buf)
}
}
pub struct GGPKBundleFS {
ggpk: GGPKFS,
lut: HashMap<u64, usize>,
index: BundleIndexFile,
}
impl GGPKBundleFS {
pub fn new(ggpk_path: &Path) -> anyhow::Result<Self> {
let ggpk = GGPKFS::new(ggpk_path)?;
let index_bytes = ggpk
.read("/Bundles2/_.index.bin")
.context("Failed to load bundle index from GGPK")?;
let index_bundle = BundleParser
.parse(&index_bytes)
.as_anyhow()
.context("Failed to parse bundle")?;
let index = BundleIndexParser
.parse(&index_bundle.read_all()?)
.as_anyhow()
.context("Failed to parse bundle as index")?;
let lut = index
.files
.iter()
.enumerate()
.map(|(i, f)| (f.hash, i))
.collect();
Ok(Self { ggpk, lut, index })
}
}
impl FileSystem for GGPKBundleFS {
fn list(&self) -> Box<dyn Iterator<Item = String> + '_> {
Box::new(
self.index
.paths
.iter()
.flat_map(|p| parse_paths(&self.index.path_rep_bundle, p).get_paths()),
)
}
fn batch_read<'a>(
&'a self,
paths: &'a [impl AsRef<str>],
) -> Box<dyn Iterator<Item = (Cow<'a, str>, anyhow::Result<Bytes>)> + 'a> {
let (fileinfos, errors) = paths
.iter()
.map(|path| {
let path = path.as_ref();
let mut hasher = HASHER.build_hasher();
hasher.write(path.to_lowercase().as_bytes());
let hash = hasher.finish();
match self.lut.get(&hash).map(|i| &self.index.files[*i]) {
Some(f) => Ok((path, f)),
None => Err((path, Err(anyhow!("Path not found in index: {}", path)))),
}
})
.bucket_result();
let fileinfos =
fileinfos
.into_iter()
.fold(HashMap::<_, Vec<_>>::new(), |mut acc, (path, fileinfo)| {
acc.entry(fileinfo.bundle_index)
.or_default()
.push((path, fileinfo));
acc
});
let file_contents = fileinfos.into_iter().flat_map(|(bundle_index, files)| {
let bundle_path = format!(
"/Bundles2/{}.bundle.bin",
self.index.bundles[bundle_index as usize].name
);
let bundle = self
.ggpk
.read(&bundle_path)
.with_context(|| format!("Failed to load bundle file: {:?}", bundle_path))
.and_then(|bundle_contents| {
BundleParser
.parse(&bundle_contents)
.as_anyhow()
.context("Failed to parse bundle")
});
let contents: Box<dyn Iterator<Item = _>> = match bundle {
Ok(b) => Box::new(files.into_iter().map(move |(path, file)| {
(path, b.read_range(file.offset as usize, file.size as usize))
})),
Err(e) => Box::new(
files
.into_iter()
.map(move |(path, _)| (path, Err(anyhow!("{:?}", e)))),
),
};
contents
});
Box::new(
errors
.into_iter()
.chain(file_contents)
.map(|(path, r)| (Cow::Borrowed(path), r)),
)
}
fn read(&self, path: &str) -> anyhow::Result<Bytes> {
let mut hasher = HASHER.build_hasher();
hasher.write(path.to_lowercase().as_bytes());
let hash = hasher.finish();
let index = self
.lut
.get(&hash)
.with_context(|| format!("Path not found in index: {}", path))?;
let file = &self.index.files[*index];
let bundle_path = format!(
"/Bundles2/{}.bundle.bin",
self.index.bundles[file.bundle_index as usize].name
);
let bundle_contents = self
.ggpk
.read(&bundle_path)
.with_context(|| format!("Failed to load bundle file: {:?}", bundle_path))?;
let bundle = BundleParser
.parse(&bundle_contents)
.as_anyhow()
.context("Failed to parse bundle")?;
let content = bundle.read_range(file.offset as usize, file.size as usize)?;
Ok(content)
}
}