use chrono::NaiveDate;
use chrono::NaiveDateTime;
use chrono::NaiveTime;
use conv::{NoError, ValueFrom};
use curl::easy::Easy;
use num_traits::ToPrimitive;
use serde::Deserialize;
use serde::Serialize;
use std::cell::RefCell;
use std::fs::File;
use std::io;
use std::io::BufReader;
use std::io::BufWriter;
use std::io::ErrorKind;
use std::path::Path;
use std::time::Duration;
use thiserror::Error;
use zip::result::ZipError;
use super::utils;
use zip::ZipArchive;
#[derive(Error, Debug)]
pub enum PartialZipError {
#[error("Invalid URL")]
InvalidUrl,
#[error("File Not Found")]
FileNotFound,
#[error("Range request not supported")]
RangeNotSupported,
#[error("{0} is a Unsupported Compression")]
UnsupportedCompression(u16),
#[error("zip error: {0}")]
ZipRsError(#[from] ZipError),
#[error("io error: {0}")]
IOError(#[from] io::Error),
#[error("CURL error: {0}")]
CURLError(#[from] curl::Error),
#[error("NoError error: {0}")]
NoError(#[from] NoError),
#[error("Conversion error: {0}")]
ConvError(#[from] conv::PosOverflow<u64>),
}
pub const DEFAULT_MAX_REDIRECTS: u32 = 10;
pub const DEFAULT_CONNECT_TIMEOUT_SECS: u64 = 30;
pub const DEFAULT_TCP_KEEPIDLE_SECS: u64 = 120;
pub const DEFAULT_TCP_KEEPINTVL_SECS: u64 = 60;
#[derive(Debug, Clone)]
pub struct PartialZipOptions {
pub check_range: bool,
pub max_redirects: u32,
pub connect_timeout: Option<Duration>,
pub tcp_keepidle: Duration,
pub tcp_keepintvl: Duration,
pub basic_auth: Option<(String, String)>,
pub proxy: Option<String>,
pub proxy_auth: Option<(String, String)>,
}
impl Default for PartialZipOptions {
fn default() -> Self {
Self {
check_range: false,
max_redirects: DEFAULT_MAX_REDIRECTS,
connect_timeout: Some(Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SECS)),
tcp_keepidle: Duration::from_secs(DEFAULT_TCP_KEEPIDLE_SECS),
tcp_keepintvl: Duration::from_secs(DEFAULT_TCP_KEEPINTVL_SECS),
basic_auth: None,
proxy: None,
proxy_auth: None,
}
}
}
impl PartialZipOptions {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub const fn check_range(mut self, check: bool) -> Self {
self.check_range = check;
self
}
#[must_use]
pub const fn max_redirects(mut self, max: u32) -> Self {
self.max_redirects = max;
self
}
#[must_use]
pub const fn connect_timeout(mut self, timeout: Option<Duration>) -> Self {
self.connect_timeout = timeout;
self
}
#[must_use]
pub const fn tcp_keepidle(mut self, duration: Duration) -> Self {
self.tcp_keepidle = duration;
self
}
#[must_use]
pub const fn tcp_keepintvl(mut self, duration: Duration) -> Self {
self.tcp_keepintvl = duration;
self
}
#[must_use]
pub fn basic_auth(mut self, username: &str, password: &str) -> Self {
self.basic_auth = Some((username.to_string(), password.to_string()));
self
}
#[must_use]
pub fn proxy(mut self, url: &str) -> Self {
self.proxy = Some(url.to_string());
self
}
#[must_use]
pub fn proxy_auth(mut self, username: &str, password: &str) -> Self {
self.proxy_auth = Some((username.to_string(), password.to_string()));
self
}
}
#[derive(Debug)]
pub struct PartialZip {
url: String,
archive: RefCell<ZipArchive<BufReader<PartialReader>>>,
file_size: u64,
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum PartialZipCompressionMethod {
Stored,
Deflated,
Bzip2,
Zstd,
Unsupported,
}
impl From<zip::CompressionMethod> for PartialZipCompressionMethod {
fn from(value: zip::CompressionMethod) -> Self {
match value {
zip::CompressionMethod::Stored => Self::Stored,
zip::CompressionMethod::Deflated => Self::Deflated,
zip::CompressionMethod::Bzip2 => Self::Bzip2,
zip::CompressionMethod::Zstd => Self::Zstd,
_ => Self::Unsupported,
}
}
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct PartialZipFileDetailed {
pub name: String,
pub compressed_size: u64,
pub compression_method: PartialZipCompressionMethod,
pub supported: bool,
pub last_modified: Option<NaiveDateTime>,
}
impl PartialZip {
pub fn new(url: &str) -> Result<Self, PartialZipError> {
Self::new_with_options(url, &PartialZipOptions::default())
}
pub fn new_check_range(url: &str, check_range: bool) -> Result<Self, PartialZipError> {
Self::new_with_options(url, &PartialZipOptions::default().check_range(check_range))
}
pub fn new_with_options(
url: &str,
options: &PartialZipOptions,
) -> Result<Self, PartialZipError> {
let reader = PartialReader::new_with_options(url, options)?;
let file_size = reader.file_size;
let bufreader = BufReader::with_capacity(0x0010_0000, reader);
let archive = ZipArchive::new(bufreader)?;
Ok(Self {
url: url.to_owned(),
archive: RefCell::new(archive),
file_size,
})
}
#[must_use]
pub fn url(&self) -> &str {
&self.url
}
pub const fn file_size(&self) -> u64 {
self.file_size
}
pub fn list_names(&self) -> Vec<String> {
self.archive
.borrow()
.file_names()
.map(std::borrow::ToOwned::to_owned)
.collect()
}
pub fn list_detailed(&self) -> Vec<PartialZipFileDetailed> {
let mut file_list = Vec::new();
let num_files = self.archive.borrow().len();
for i in 0..num_files {
match self.archive.borrow_mut().by_index(i) {
Ok(file) => {
let compression_method = file.compression();
let supported = matches!(
compression_method,
zip::CompressionMethod::Stored
| zip::CompressionMethod::Deflated
| zip::CompressionMethod::Bzip2
| zip::CompressionMethod::Zstd
);
let (date, time) = file.last_modified().map_or((None, None), |datetime| {
(
NaiveDate::from_ymd_opt(
datetime.year().into(),
datetime.month().into(),
datetime.day().into(),
),
NaiveTime::from_hms_opt(
datetime.hour().into(),
datetime.minute().into(),
datetime.second().into(),
),
)
});
let last_modified = if let (Some(d), Some(t)) = (date, time) {
Some(NaiveDateTime::new(d, t))
} else {
None
};
let pzf = PartialZipFileDetailed {
name: file.name().to_string(),
compressed_size: file.compressed_size(),
compression_method: compression_method.into(),
supported,
last_modified,
};
file_list.push(pzf);
}
Err(e) => {
log::warn!("list: error while matching file by index: {i} - {e}");
}
};
}
file_list
}
pub fn download(&self, filename: &str) -> Result<Vec<u8>, PartialZipError> {
let mut content: Vec<u8> = Vec::new();
self.download_to_write(filename, &mut content)?;
Ok(content)
}
pub fn download_to_file(
&self,
filename: &str,
output_path: &Path,
) -> Result<u64, PartialZipError> {
let file = File::create(output_path)?;
let mut writer = BufWriter::new(file);
self.download_to_write(filename, &mut writer)
}
pub fn download_to_write(
&self,
filename: &str,
writer: &mut dyn std::io::Write,
) -> Result<u64, PartialZipError> {
let mut archive = self.archive.borrow_mut();
let mut file = archive.by_name(filename)?;
let bytes_written = io::copy(&mut file, writer)?;
Ok(bytes_written)
}
#[cfg(feature = "progressbar")]
pub fn download_with_progressbar(&self, filename: &str) -> Result<Vec<u8>, PartialZipError> {
let mut content: Vec<u8> = Vec::new();
self.download_to_write_with_progressbar(filename, &mut content)?;
Ok(content)
}
#[cfg(feature = "progressbar")]
pub fn download_to_file_with_progressbar(
&self,
filename: &str,
output_path: &Path,
) -> Result<u64, PartialZipError> {
let file = File::create(output_path)?;
let mut writer = BufWriter::new(file);
self.download_to_write_with_progressbar(filename, &mut writer)
}
#[cfg(feature = "progressbar")]
pub fn download_to_write_with_progressbar(
&self,
filename: &str,
writer: &mut dyn std::io::Write,
) -> Result<u64, PartialZipError> {
use indicatif::ProgressBar;
let mut archive = self.archive.borrow_mut();
let file = archive.by_name(filename)?;
let pb = ProgressBar::new(file.compressed_size());
let bytes_written = io::copy(&mut pb.wrap_read(file), writer)?;
Ok(bytes_written)
}
pub fn download_multiple(
&self,
filenames: &[&str],
) -> Result<Vec<(String, Vec<u8>)>, PartialZipError> {
filenames
.iter()
.map(|filename| {
let content = self.download(filename)?;
Ok(((*filename).to_string(), content))
})
.collect()
}
pub fn download_multiple_to_dir(
&self,
filenames: &[&str],
output_dir: &Path,
) -> Result<u64, PartialZipError> {
let mut total_bytes = 0u64;
for filename in filenames {
let output_name = Path::new(filename)
.file_name()
.unwrap_or_else(|| std::ffi::OsStr::new(filename));
let output_path = output_dir.join(output_name);
total_bytes += self.download_to_file(filename, &output_path)?;
}
Ok(total_bytes)
}
#[cfg(feature = "progressbar")]
pub fn download_multiple_to_dir_with_progressbar(
&self,
filenames: &[&str],
output_dir: &Path,
) -> Result<u64, PartialZipError> {
let mut total_bytes = 0u64;
for filename in filenames {
let output_name = Path::new(filename)
.file_name()
.unwrap_or_else(|| std::ffi::OsStr::new(filename));
let output_path = output_dir.join(output_name);
total_bytes += self.download_to_file_with_progressbar(filename, &output_path)?;
}
Ok(total_bytes)
}
}
#[derive(Debug)]
pub struct PartialReader {
url: String,
file_size: u64,
easy: Easy,
pos: u64,
}
const HTTP_PARTIAL_CONTENT: u32 = 206;
impl PartialReader {
pub fn new(url: &str) -> Result<Self, PartialZipError> {
Self::new_with_options(url, &PartialZipOptions::default())
}
pub fn new_check_range(url: &str, check_range: bool) -> Result<Self, PartialZipError> {
Self::new_with_options(url, &PartialZipOptions::default().check_range(check_range))
}
pub fn new_with_options(
url: &str,
options: &PartialZipOptions,
) -> Result<Self, PartialZipError> {
if !utils::url_is_valid(url) {
return Err(PartialZipError::InvalidUrl);
}
let mut easy = Easy::new();
easy.url(url)?;
easy.follow_location(true)?;
easy.max_redirections(options.max_redirects)?;
if let Some(timeout) = options.connect_timeout {
easy.connect_timeout(timeout)?;
}
easy.tcp_keepalive(true)?;
easy.tcp_keepidle(options.tcp_keepidle)?;
easy.tcp_keepintvl(options.tcp_keepintvl)?;
if let Some((username, password)) = &options.basic_auth {
easy.username(username)?;
easy.password(password)?;
}
if let Some(proxy_url) = &options.proxy {
easy.proxy(proxy_url)?;
}
if let Some((username, password)) = &options.proxy_auth {
easy.proxy_username(username)?;
easy.proxy_password(password)?;
}
easy.nobody(true)?;
easy.write_function(|data| Ok(data.len()))?;
easy.perform()?;
let file_size = easy
.content_length_download()?
.to_u64()
.ok_or_else(|| std::io::Error::new(ErrorKind::InvalidData, "invalid content length"))?;
if options.check_range {
easy.range("0-0")?;
easy.nobody(true)?;
easy.perform()?;
let head_size = easy.content_length_download()?.to_u64().ok_or_else(|| {
std::io::Error::new(ErrorKind::InvalidData, "can not perform range request")
})?;
if head_size != 1 {
return Err(PartialZipError::RangeNotSupported);
}
if easy.response_code()? != HTTP_PARTIAL_CONTENT {
return Err(PartialZipError::RangeNotSupported);
}
easy.range("")?;
easy.nobody(false)?;
}
Ok(Self {
url: url.to_owned(),
file_size,
easy,
pos: 0,
})
}
#[must_use]
pub fn url(&self) -> &str {
&self.url
}
}
impl io::Read for PartialReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
log::trace!(
"read self.pos = {:x} self.file_size = {:x}",
self.pos,
self.file_size
);
if self.pos >= self.file_size {
return Ok(0);
}
let start = self.pos;
let maybe_end = start
.checked_add(buf.len().to_u64().ok_or_else(|| {
std::io::Error::new(
ErrorKind::InvalidData,
format!("The buf len is invalid {}", buf.len()),
)
})?)
.ok_or_else(|| {
std::io::Error::new(
ErrorKind::InvalidData,
format!("start + buf.len() overflow {start} {}", buf.len()),
)
})?
.checked_sub(1)
.ok_or_else(|| {
std::io::Error::new(
ErrorKind::InvalidData,
format!("start + buf.len() - 1 underflow {start} {}", buf.len()),
)
})?;
log::trace!("maybe_end = {maybe_end:x}");
let end = std::cmp::min(
maybe_end,
self.file_size.checked_sub(1).ok_or_else(|| {
std::io::Error::new(
ErrorKind::InvalidData,
format!("file_size - 1 underflow {}", self.file_size),
)
})?,
);
log::trace!("end = {end:x} start = {start:x}");
if end < start {
return Err(std::io::Error::new(
ErrorKind::InvalidData,
format!("end < start: {end} < {start}"),
));
}
let range = format!("{start}-{end}");
log::trace!("range = {range}");
self.easy.range(&range)?;
self.easy.get(true)?;
let mut content: Vec<u8> = Vec::new();
{
let mut transfer = self.easy.transfer();
transfer.write_function(|data| {
log::trace!("transfered {:x} bytes", data.len());
content.extend_from_slice(data);
Ok(data.len())
})?;
transfer.perform()?;
};
let n = io::Read::read(&mut content.as_slice(), buf)?;
self.pos = self
.pos
.checked_add(n.to_u64().ok_or_else(|| {
std::io::Error::new(ErrorKind::InvalidData, format!("invalid read amount {n}"))
})?)
.ok_or_else(|| {
std::io::Error::new(
ErrorKind::InvalidData,
format!("adding {n} overflows the reader position {}", self.pos),
)
})?;
log::trace!("new self.pos = {:x}", self.pos);
Ok(n)
}
}
impl io::Seek for PartialReader {
fn seek(&mut self, style: io::SeekFrom) -> io::Result<u64> {
let (base_pos, offset) = match style {
io::SeekFrom::Start(n) => {
self.pos = n;
return Ok(n);
}
io::SeekFrom::End(n) => (self.file_size, n),
io::SeekFrom::Current(n) => (self.pos, n),
};
log::trace!("seek base_pos = {base_pos:x} offset = {offset:x}");
let new_pos = if offset >= 0 {
base_pos.checked_add(
u64::value_from(offset)
.map_err(|e| std::io::Error::new(ErrorKind::InvalidData, e.to_string()))?,
)
} else {
base_pos.checked_sub(
u64::value_from(offset.wrapping_neg())
.map_err(|e| std::io::Error::new(ErrorKind::InvalidData, e.to_string()))?,
)
};
match new_pos {
Some(n) => {
self.pos = n;
log::trace!("new self.pos = {n:x}");
Ok(self.pos)
}
None => Err(std::io::Error::new(
ErrorKind::InvalidInput,
"invalid seek to a negative or overflowing position",
)),
}
}
}