use crate::error::JmdictError;
use crate::install::REQUIRED_FILES;
use flate2::read::GzDecoder;
use std::io::{Cursor, Read};
use std::path::{Path, PathBuf};
use tar::Archive;
const MAX_DOWNLOAD_BYTES: usize = 50 * 1024 * 1024;
const MAX_ENTRY_BYTES: u64 = MAX_DOWNLOAD_BYTES as u64;
pub(crate) fn download(url: &str) -> Result<Vec<u8>, JmdictError> {
let resp = ureq::get(url)
.call()
.map_err(|e| JmdictError::NetworkError(e.to_string()))?;
let mut buf = Vec::new();
resp.into_reader()
.take((MAX_DOWNLOAD_BYTES + 1) as u64)
.read_to_end(&mut buf)
.map_err(JmdictError::IoError)?;
if buf.len() > MAX_DOWNLOAD_BYTES {
return Err(JmdictError::NetworkError(format!(
"tarball exceeds {MAX_DOWNLOAD_BYTES} byte limit"
)));
}
Ok(buf)
}
pub(crate) fn extract(bytes: &[u8], target: &Path) -> Result<(), JmdictError> {
let gz = GzDecoder::new(Cursor::new(bytes));
extract_archive(Archive::new(gz), target)
}
pub(crate) fn extract_from_path(path: &Path, target: &Path) -> Result<(), JmdictError> {
let file = std::fs::File::open(path).map_err(JmdictError::from)?;
let gz = GzDecoder::new(file);
extract_archive(Archive::new(gz), target)
}
fn extract_archive<R: Read>(mut archive: Archive<R>, target: &Path) -> Result<(), JmdictError> {
for entry in archive.entries()? {
let mut entry = entry?;
let path = entry.path()?.into_owned();
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
if !REQUIRED_FILES.contains(&name) {
continue;
}
if entry.size() > MAX_ENTRY_BYTES {
return Err(JmdictError::DataCorrupted);
}
let dest: PathBuf = target.join(name);
let mut out = std::fs::File::create(&dest)?;
std::io::copy(&mut entry, &mut out)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
fn make_targz(entries: &[(&str, &[u8])]) -> Vec<u8> {
let mut tar_buf = Vec::new();
{
let mut builder = tar::Builder::new(&mut tar_buf);
for (name, bytes) in entries {
let mut header = tar::Header::new_gnu();
header.set_path(name).unwrap();
header.set_size(bytes.len() as u64);
header.set_mode(0o644);
header.set_cksum();
builder.append(&header, *bytes).unwrap();
}
builder.finish().unwrap();
}
let mut gz = GzEncoder::new(Vec::new(), Compression::fast());
gz.write_all(&tar_buf).unwrap();
gz.finish().unwrap()
}
#[test]
fn extract_writes_only_required_files() {
let tmp = tempfile::tempdir().unwrap();
let tarball = make_targz(&[
("entries.bin", b"e"),
("README.md", b"ignore me"),
("kana.fst", b"k"),
]);
extract(&tarball, tmp.path()).unwrap();
assert!(tmp.path().join("entries.bin").exists());
assert!(tmp.path().join("kana.fst").exists());
assert!(!tmp.path().join("README.md").exists());
}
#[test]
fn extract_tolerates_nested_directory_layout() {
let tmp = tempfile::tempdir().unwrap();
let tarball = make_targz(&[
("dist/entries.bin", b"e"),
("dist/kana.fst", b"k"),
]);
extract(&tarball, tmp.path()).unwrap();
assert!(tmp.path().join("entries.bin").exists());
assert!(tmp.path().join("kana.fst").exists());
}
}