use std::fs::File;
use std::io::{self, BufReader, Read, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use crate::format::SIGNATURE_HEADER_SIZE;
use crate::format::header::StartHeader;
use crate::{Error, Result};
pub trait VolumeReader: Read + Seek {
fn volume_count(&self) -> u32;
fn volume_sizes(&self) -> &[u64];
fn current_volume(&self) -> u32;
fn total_size(&self) -> u64;
}
pub struct MultiVolumeReader {
volumes: Vec<Option<BufReader<File>>>,
volume_sizes: Vec<u64>,
base_path: PathBuf,
position: u64,
current_volume: usize,
volume_position: u64,
total_size: u64,
}
impl MultiVolumeReader {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref();
let base_path = Self::detect_base_path(path)?;
let (volume_sizes, total_size) = Self::detect_volumes(&base_path)?;
if volume_sizes.is_empty() {
return Err(Error::InvalidFormat("No volume files found".to_string()));
}
Self::validate_complete_archive(&base_path, &volume_sizes, total_size)?;
Self::create_unchecked(base_path, volume_sizes, total_size)
}
#[cfg(test)]
pub(crate) fn open_unchecked(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref();
let base_path = Self::detect_base_path(path)?;
let (volume_sizes, total_size) = Self::detect_volumes(&base_path)?;
if volume_sizes.is_empty() {
return Err(Error::InvalidFormat("No volume files found".to_string()));
}
Self::create_unchecked(base_path, volume_sizes, total_size)
}
fn create_unchecked(
base_path: PathBuf,
volume_sizes: Vec<u64>,
total_size: u64,
) -> Result<Self> {
let mut volumes = Vec::with_capacity(volume_sizes.len());
for _ in 0..volume_sizes.len() {
volumes.push(None);
}
Ok(Self {
volumes,
volume_sizes,
base_path,
position: 0,
current_volume: 0,
volume_position: 0,
total_size,
})
}
fn validate_complete_archive(
base_path: &Path,
volume_sizes: &[u64],
total_size: u64,
) -> Result<()> {
let first_volume_path = Self::volume_path_for(base_path, 1);
let mut file = File::open(&first_volume_path).map_err(Error::Io)?;
let start_header = StartHeader::parse(&mut file)?;
let expected_size =
SIGNATURE_HEADER_SIZE + start_header.next_header_offset + start_header.next_header_size;
if total_size < expected_size {
let mut cumulative: u64 = 0;
for &size in volume_sizes.iter() {
cumulative += size;
if cumulative >= expected_size {
return Ok(());
}
}
let missing_volume = (volume_sizes.len() + 1) as u32;
let missing_path = Self::volume_path_for(base_path, missing_volume);
return Err(Error::VolumeMissing {
volume: missing_volume,
path: missing_path.to_string_lossy().to_string(),
source: io::Error::new(io::ErrorKind::NotFound, "Volume file not found"),
});
}
Ok(())
}
fn detect_base_path(path: &Path) -> Result<PathBuf> {
let path_str = path.to_string_lossy();
if let Some(pos) = path_str.rfind(".7z.") {
let suffix = &path_str[pos + 4..];
if suffix.chars().all(|c| c.is_ascii_digit()) && !suffix.is_empty() {
let base = &path_str[..pos + 3]; return Ok(PathBuf::from(base));
}
}
if path_str.ends_with(".7z") {
let first_volume = PathBuf::from(format!("{}.001", path_str));
if first_volume.exists() {
return Ok(path.to_path_buf());
}
return Err(Error::InvalidFormat(
"Not a multi-volume archive (no .7z.001 found)".to_string(),
));
}
Err(Error::InvalidFormat(
"Could not determine volume base path".to_string(),
))
}
fn detect_volumes(base_path: &Path) -> Result<(Vec<u64>, u64)> {
let mut sizes = Vec::new();
let mut total = 0u64;
let mut volume_num = 1u32;
loop {
let volume_path = Self::volume_path_for(base_path, volume_num);
match std::fs::metadata(&volume_path) {
Ok(meta) => {
let size = meta.len();
sizes.push(size);
total += size;
volume_num += 1;
}
Err(e) if e.kind() == io::ErrorKind::NotFound => break,
Err(e) => {
return Err(Error::Io(e));
}
}
}
Ok((sizes, total))
}
fn volume_path_for(base: &Path, num: u32) -> PathBuf {
let base_str = base.to_string_lossy();
PathBuf::from(format!("{}.{:03}", base_str, num))
}
fn open_volume(&mut self, index: usize) -> Result<&mut BufReader<File>> {
if self.volumes[index].is_none() {
let path = Self::volume_path_for(&self.base_path, (index + 1) as u32);
let file = File::open(&path).map_err(|e| Error::VolumeMissing {
volume: (index + 1) as u32,
path: path.to_string_lossy().to_string(),
source: e,
})?;
self.volumes[index] = Some(BufReader::new(file));
}
Ok(self.volumes[index].as_mut().unwrap())
}
fn position_to_volume(&self, pos: u64) -> (usize, u64) {
let mut remaining = pos;
for (i, &size) in self.volume_sizes.iter().enumerate() {
if remaining < size {
return (i, remaining);
}
remaining -= size;
}
let last = self.volume_sizes.len().saturating_sub(1);
(last, self.volume_sizes.get(last).copied().unwrap_or(0))
}
pub fn base_path(&self) -> &Path {
&self.base_path
}
pub fn get_volume_path(&self, volume_number: u32) -> PathBuf {
Self::volume_path_for(&self.base_path, volume_number)
}
pub fn verify_volumes(&self) -> Result<()> {
for i in 0..self.volume_sizes.len() {
let path = Self::volume_path_for(&self.base_path, (i + 1) as u32);
if !path.exists() {
return Err(Error::VolumeMissing {
volume: (i + 1) as u32,
path: path.to_string_lossy().to_string(),
source: io::Error::new(io::ErrorKind::NotFound, "Volume file not found"),
});
}
}
Ok(())
}
}
impl Read for MultiVolumeReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.position >= self.total_size {
return Ok(0);
}
let mut total_read = 0;
let mut buf_offset = 0;
while buf_offset < buf.len() && self.position < self.total_size {
let current_volume_size = self.volume_sizes[self.current_volume];
let remaining_in_volume = current_volume_size - self.volume_position;
if remaining_in_volume == 0 {
self.current_volume += 1;
self.volume_position = 0;
if self.current_volume >= self.volumes.len() {
break;
}
continue;
}
let to_read = (buf.len() - buf_offset).min(remaining_in_volume as usize);
let seek_pos = self.volume_position;
let current_vol = self.current_volume;
let volume = self.open_volume(current_vol).map_err(io::Error::other)?;
volume.seek(SeekFrom::Start(seek_pos))?;
let n = volume.read(&mut buf[buf_offset..buf_offset + to_read])?;
if n == 0 {
break;
}
buf_offset += n;
total_read += n;
self.position += n as u64;
self.volume_position += n as u64;
}
Ok(total_read)
}
}
impl Seek for MultiVolumeReader {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
let new_pos = match pos {
SeekFrom::Start(p) => p as i64,
SeekFrom::End(p) => self.total_size as i64 + p,
SeekFrom::Current(p) => self.position as i64 + p,
};
if new_pos < 0 {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
"Cannot seek before start of stream",
));
}
let new_pos = new_pos as u64;
self.position = new_pos.min(self.total_size);
let (vol_idx, vol_pos) = self.position_to_volume(self.position);
self.current_volume = vol_idx;
self.volume_position = vol_pos;
Ok(self.position)
}
}
impl VolumeReader for MultiVolumeReader {
fn volume_count(&self) -> u32 {
self.volume_sizes.len() as u32
}
fn volume_sizes(&self) -> &[u64] {
&self.volume_sizes
}
fn current_volume(&self) -> u32 {
(self.current_volume + 1) as u32
}
fn total_size(&self) -> u64 {
self.total_size
}
}
impl std::fmt::Debug for MultiVolumeReader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("MultiVolumeReader")
.field("base_path", &self.base_path)
.field("volume_count", &self.volume_sizes.len())
.field("total_size", &self.total_size)
.field("position", &self.position)
.field("current_volume", &(self.current_volume + 1))
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
fn create_test_volumes(dir: &Path, base_name: &str, sizes: &[usize]) -> PathBuf {
let base_path = dir.join(base_name);
for (i, &size) in sizes.iter().enumerate() {
let volume_path = PathBuf::from(format!("{}.{:03}", base_path.display(), i + 1));
let mut file = File::create(&volume_path).unwrap();
let data: Vec<u8> = (0..size).map(|j| ((i * 256 + j) % 256) as u8).collect();
file.write_all(&data).unwrap();
}
base_path
}
#[test]
fn test_detect_base_path() {
let result = MultiVolumeReader::detect_base_path(Path::new("archive.7z.001"));
assert_eq!(result.unwrap(), PathBuf::from("archive.7z"));
let result = MultiVolumeReader::detect_base_path(Path::new("archive.7z.002"));
assert_eq!(result.unwrap(), PathBuf::from("archive.7z"));
let result = MultiVolumeReader::detect_base_path(Path::new("/path/to/archive.7z.123"));
assert_eq!(result.unwrap(), PathBuf::from("/path/to/archive.7z"));
}
#[test]
fn test_volume_path_generation() {
let base = PathBuf::from("test.7z");
assert_eq!(
MultiVolumeReader::volume_path_for(&base, 1),
PathBuf::from("test.7z.001")
);
assert_eq!(
MultiVolumeReader::volume_path_for(&base, 10),
PathBuf::from("test.7z.010")
);
assert_eq!(
MultiVolumeReader::volume_path_for(&base, 100),
PathBuf::from("test.7z.100")
);
}
#[test]
fn test_open_multivolume() {
let dir = TempDir::new().unwrap();
let base_path = create_test_volumes(dir.path(), "test.7z", &[100, 100, 50]);
let reader =
MultiVolumeReader::open_unchecked(format!("{}.001", base_path.display())).unwrap();
assert_eq!(reader.volume_count(), 3);
assert_eq!(reader.volume_sizes(), &[100, 100, 50]);
assert_eq!(reader.total_size(), 250);
assert_eq!(reader.current_volume(), 1);
}
#[test]
fn test_read_across_volumes() {
let dir = TempDir::new().unwrap();
let base_path = create_test_volumes(dir.path(), "test.7z", &[100, 100, 50]);
let mut reader =
MultiVolumeReader::open_unchecked(format!("{}.001", base_path.display())).unwrap();
let mut buffer = vec![0u8; 250];
let n = reader.read(&mut buffer).unwrap();
assert_eq!(n, 250);
assert_eq!(buffer[0], 0); assert_eq!(buffer[100], 0); assert_eq!(buffer[200], 0); }
#[test]
fn test_seek_operations() {
let dir = TempDir::new().unwrap();
let base_path = create_test_volumes(dir.path(), "test.7z", &[100, 100, 50]);
let mut reader =
MultiVolumeReader::open_unchecked(format!("{}.001", base_path.display())).unwrap();
let pos = reader.seek(SeekFrom::Start(150)).unwrap();
assert_eq!(pos, 150);
assert_eq!(reader.current_volume(), 2);
let pos = reader.seek(SeekFrom::Start(0)).unwrap();
assert_eq!(pos, 0);
assert_eq!(reader.current_volume(), 1);
let pos = reader.seek(SeekFrom::End(-50)).unwrap();
assert_eq!(pos, 200);
assert_eq!(reader.current_volume(), 3);
reader.seek(SeekFrom::Start(100)).unwrap();
let pos = reader.seek(SeekFrom::Current(25)).unwrap();
assert_eq!(pos, 125);
}
#[test]
fn test_no_volumes_error() {
let dir = TempDir::new().unwrap();
let result = MultiVolumeReader::open(dir.path().join("nonexistent.7z.001"));
assert!(result.is_err());
}
#[test]
fn test_position_to_volume() {
let dir = TempDir::new().unwrap();
let base_path = create_test_volumes(dir.path(), "test.7z", &[100, 100, 50]);
let reader =
MultiVolumeReader::open_unchecked(format!("{}.001", base_path.display())).unwrap();
let (vol, off) = reader.position_to_volume(50);
assert_eq!(vol, 0);
assert_eq!(off, 50);
let (vol, off) = reader.position_to_volume(100);
assert_eq!(vol, 1);
assert_eq!(off, 0);
let (vol, off) = reader.position_to_volume(225);
assert_eq!(vol, 2);
assert_eq!(off, 25);
}
}