#[derive(Debug, Clone)]
pub struct StreamingConfig {
pub max_memory_buffer: usize,
pub read_buffer_size: usize,
pub verify_crc: bool,
pub track_progress: bool,
pub max_entries: usize,
pub max_compression_ratio: u32,
pub decoder_pool_capacity: Option<usize>,
}
impl Default for StreamingConfig {
fn default() -> Self {
Self {
max_memory_buffer: 64 * 1024 * 1024, read_buffer_size: 64 * 1024, verify_crc: true,
track_progress: true,
max_entries: 1_000_000,
max_compression_ratio: 1000,
decoder_pool_capacity: Some(0), }
}
}
impl StreamingConfig {
pub fn new() -> Self {
Self::default()
}
pub fn low_memory() -> Self {
Self {
max_memory_buffer: 8 * 1024 * 1024, read_buffer_size: 16 * 1024, verify_crc: true,
track_progress: false,
max_entries: 100_000,
max_compression_ratio: 1000,
decoder_pool_capacity: Some(2), }
}
pub fn high_performance() -> Self {
Self {
max_memory_buffer: 256 * 1024 * 1024, read_buffer_size: 256 * 1024, verify_crc: true,
track_progress: true,
max_entries: 10_000_000,
max_compression_ratio: 10000,
decoder_pool_capacity: Some(0), }
}
#[cfg(feature = "sysinfo")]
pub fn auto_sized() -> Self {
use sysinfo::System;
let mut sys = System::new();
sys.refresh_memory();
let total_memory = sys.total_memory(); let available_memory = sys.available_memory();
let target = (available_memory / 8).max(total_memory / 16);
let min_buffer = 32 * 1024 * 1024; let max_buffer = 1024 * 1024 * 1024; let buffer_size = (target as usize).clamp(min_buffer, max_buffer);
let read_buffer = (buffer_size / 1000).clamp(32 * 1024, 512 * 1024);
Self {
max_memory_buffer: buffer_size,
read_buffer_size: read_buffer,
verify_crc: true,
track_progress: true,
max_entries: 1_000_000,
max_compression_ratio: 1000,
decoder_pool_capacity: Some(0), }
}
#[cfg(not(feature = "sysinfo"))]
pub fn auto_sized() -> Self {
Self::default()
}
#[cfg(feature = "sysinfo")]
pub fn system_memory_info() -> Option<SystemMemoryInfo> {
use sysinfo::System;
let mut sys = System::new();
sys.refresh_memory();
Some(SystemMemoryInfo {
total: sys.total_memory(),
available: sys.available_memory(),
used: sys.used_memory(),
})
}
#[cfg(not(feature = "sysinfo"))]
pub fn system_memory_info() -> Option<SystemMemoryInfo> {
None
}
pub fn max_memory_buffer(mut self, bytes: usize) -> Self {
self.max_memory_buffer = bytes;
self
}
pub fn read_buffer_size(mut self, bytes: usize) -> Self {
self.read_buffer_size = bytes;
self
}
pub fn verify_crc(mut self, verify: bool) -> Self {
self.verify_crc = verify;
self
}
pub fn track_progress(mut self, track: bool) -> Self {
self.track_progress = track;
self
}
pub fn max_entries(mut self, count: usize) -> Self {
self.max_entries = count;
self
}
pub fn max_compression_ratio(mut self, ratio: u32) -> Self {
self.max_compression_ratio = ratio;
self
}
pub fn decoder_pool_capacity(mut self, capacity: Option<usize>) -> Self {
self.decoder_pool_capacity = capacity;
self
}
pub fn disable_decoder_pool(mut self) -> Self {
self.decoder_pool_capacity = None;
self
}
pub fn resolved_decoder_pool_capacity(&self) -> usize {
match self.decoder_pool_capacity {
Some(0) => std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(4),
Some(n) => n,
None => 0,
}
}
pub fn validate(&self) -> crate::Result<()> {
if self.max_memory_buffer == 0 {
return Err(crate::Error::InvalidFormat(
"max_memory_buffer must be greater than 0".into(),
));
}
if self.read_buffer_size == 0 {
return Err(crate::Error::InvalidFormat(
"read_buffer_size must be greater than 0".into(),
));
}
if self.read_buffer_size > self.max_memory_buffer {
return Err(crate::Error::InvalidFormat(
"read_buffer_size cannot exceed max_memory_buffer".into(),
));
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct MemoryEstimate {
pub minimum: usize,
pub typical: usize,
pub maximum: usize,
}
impl MemoryEstimate {
pub fn new(minimum: usize, typical: usize, maximum: usize) -> Self {
Self {
minimum,
typical,
maximum,
}
}
pub fn fixed(size: usize) -> Self {
Self::new(size, size, size)
}
pub fn add(&self, other: &MemoryEstimate) -> MemoryEstimate {
MemoryEstimate {
minimum: self.minimum.saturating_add(other.minimum),
typical: self.typical.saturating_add(other.typical),
maximum: self.maximum.saturating_add(other.maximum),
}
}
pub fn format_human(&self) -> String {
use crate::progress::format_bytes_iec_usize;
format!(
"{} - {} (typical: {})",
format_bytes_iec_usize(self.minimum),
format_bytes_iec_usize(self.maximum),
format_bytes_iec_usize(self.typical)
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompressionMethod {
Copy,
Lzma,
Lzma2,
Deflate,
Bzip2,
Ppmd,
Lz4,
Zstd,
Brotli,
}
impl CompressionMethod {
pub fn estimate_decoder_memory(&self, dict_size: Option<u32>) -> MemoryEstimate {
match self {
Self::Copy => MemoryEstimate::fixed(0),
Self::Lzma | Self::Lzma2 => {
let dict = dict_size.unwrap_or(8 * 1024 * 1024) as usize;
let base_state = 20 * 1024;
let minimum = dict + base_state;
let typical = dict + base_state + 64 * 1024;
let maximum = dict + base_state + 256 * 1024;
MemoryEstimate::new(minimum, typical, maximum)
}
Self::Deflate => {
let minimum = 32 * 1024 + 8 * 1024;
let typical = 64 * 1024;
let maximum = 128 * 1024;
MemoryEstimate::new(minimum, typical, maximum)
}
Self::Bzip2 => {
let minimum = 100 * 1024; let typical = 400 * 1024; let maximum = 900 * 1024; MemoryEstimate::new(minimum, typical, maximum)
}
Self::Ppmd => {
let minimum = 1024 * 1024;
let typical = 16 * 1024 * 1024;
let maximum = 256 * 1024 * 1024;
MemoryEstimate::new(minimum, typical, maximum)
}
Self::Lz4 => {
let minimum = 16 * 1024;
let typical = 64 * 1024;
let maximum = 256 * 1024;
MemoryEstimate::new(minimum, typical, maximum)
}
Self::Zstd => {
let minimum = 128 * 1024;
let typical = 1024 * 1024;
let maximum = 128 * 1024 * 1024; MemoryEstimate::new(minimum, typical, maximum)
}
Self::Brotli => {
let minimum = 256 * 1024;
let typical = 4 * 1024 * 1024;
let maximum = 16 * 1024 * 1024;
MemoryEstimate::new(minimum, typical, maximum)
}
}
}
pub fn estimate_encoder_memory(&self, dict_size: Option<u32>, level: u32) -> MemoryEstimate {
let level = level.min(9);
match self {
Self::Copy => MemoryEstimate::fixed(0),
Self::Lzma | Self::Lzma2 => {
let dict =
dict_size.unwrap_or_else(|| Self::lzma_default_dict_size(level)) as usize;
let hash_tables = dict * 4;
let buffers = 64 * 1024;
let base = dict + hash_tables + buffers;
let level_factor = 1.0 + (level as f64 * 0.1);
let minimum = base;
let typical = (base as f64 * level_factor) as usize;
let maximum = (base as f64 * 1.5) as usize;
MemoryEstimate::new(minimum, typical, maximum)
}
Self::Deflate => {
let base = match level {
0..=3 => 128 * 1024,
4..=6 => 256 * 1024,
_ => 512 * 1024,
};
MemoryEstimate::new(base, base + 64 * 1024, base * 2)
}
Self::Bzip2 => {
let block_size = (level.max(1) * 100 * 1024) as usize;
let encoder_mem = block_size * 8;
MemoryEstimate::new(encoder_mem, encoder_mem + 100 * 1024, encoder_mem * 2)
}
Self::Ppmd => {
self.estimate_decoder_memory(None)
}
Self::Lz4 => {
let base = match level {
0..=3 => 16 * 1024,
4..=6 => 64 * 1024,
_ => 256 * 1024,
};
MemoryEstimate::new(base, base * 2, base * 4)
}
Self::Zstd => {
let base = match level {
0..=3 => 1024 * 1024,
4..=6 => 8 * 1024 * 1024,
7..=9 => 64 * 1024 * 1024,
_ => 128 * 1024 * 1024,
};
MemoryEstimate::new(base / 2, base, base * 2)
}
Self::Brotli => {
let base = match level {
0..=4 => 1024 * 1024,
5..=7 => 4 * 1024 * 1024,
_ => 16 * 1024 * 1024,
};
MemoryEstimate::new(base / 2, base, base * 2)
}
}
}
fn lzma_default_dict_size(level: u32) -> u32 {
match level {
0 => 64 * 1024, 1 => 256 * 1024, 2 => 1024 * 1024, 3 => 2 * 1024 * 1024, 4 => 4 * 1024 * 1024, 5 => 8 * 1024 * 1024, 6 => 8 * 1024 * 1024, 7 => 16 * 1024 * 1024, 8 => 32 * 1024 * 1024, _ => 64 * 1024 * 1024, }
}
}
impl From<crate::codec::CodecMethod> for CompressionMethod {
fn from(method: crate::codec::CodecMethod) -> Self {
match method {
crate::codec::CodecMethod::Copy => Self::Copy,
crate::codec::CodecMethod::Lzma => Self::Lzma,
crate::codec::CodecMethod::Lzma2 => Self::Lzma2,
crate::codec::CodecMethod::Deflate => Self::Deflate,
crate::codec::CodecMethod::BZip2 => Self::Bzip2,
crate::codec::CodecMethod::PPMd => Self::Ppmd,
crate::codec::CodecMethod::Lz4 => Self::Lz4,
crate::codec::CodecMethod::Zstd => Self::Zstd,
crate::codec::CodecMethod::Brotli => Self::Brotli,
}
}
}
impl StreamingConfig {
pub fn estimate_memory(
&self,
method: CompressionMethod,
dict_size: Option<u32>,
) -> MemoryEstimate {
let buffer_mem = MemoryEstimate::fixed(self.max_memory_buffer + self.read_buffer_size);
let decoder_mem = method.estimate_decoder_memory(dict_size);
buffer_mem.add(&decoder_mem)
}
pub fn estimate_extraction_memory(&self) -> MemoryEstimate {
let decoder = CompressionMethod::Lzma2.estimate_decoder_memory(Some(16 * 1024 * 1024));
let buffers = MemoryEstimate::new(
self.read_buffer_size,
self.read_buffer_size + 64 * 1024,
self.max_memory_buffer,
);
decoder.add(&buffers)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SystemMemoryInfo {
pub total: u64,
pub available: u64,
pub used: u64,
}
impl SystemMemoryInfo {
pub fn usage_percent(&self) -> f64 {
if self.total == 0 {
0.0
} else {
(self.used as f64 / self.total as f64) * 100.0
}
}
pub fn format_human(&self) -> String {
use crate::progress::format_bytes_iec_usize;
format!(
"Total: {}, Available: {}, Used: {} ({:.1}%)",
format_bytes_iec_usize(self.total as usize),
format_bytes_iec_usize(self.available as usize),
format_bytes_iec_usize(self.used as usize),
self.usage_percent()
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_memory_estimate_add() {
let a = MemoryEstimate::new(100, 200, 300);
let b = MemoryEstimate::new(10, 20, 30);
let sum = a.add(&b);
assert_eq!(sum.minimum, 110);
assert_eq!(sum.typical, 220);
assert_eq!(sum.maximum, 330);
}
#[test]
fn test_memory_estimate_fixed() {
let fixed = MemoryEstimate::fixed(1024);
assert_eq!(fixed.minimum, 1024);
assert_eq!(fixed.typical, 1024);
assert_eq!(fixed.maximum, 1024);
}
#[test]
fn test_format_bytes() {
use crate::progress::format_bytes_iec_usize;
assert_eq!(format_bytes_iec_usize(512), "512 B");
assert_eq!(format_bytes_iec_usize(1024), "1.0 KiB");
assert_eq!(format_bytes_iec_usize(1024 * 1024), "1.0 MiB");
assert_eq!(format_bytes_iec_usize(1024 * 1024 * 1024), "1.0 GiB");
}
#[test]
fn test_lzma_decoder_memory() {
let estimate = CompressionMethod::Lzma2.estimate_decoder_memory(Some(8 * 1024 * 1024));
assert!(estimate.minimum >= 8 * 1024 * 1024);
}
#[test]
fn test_config_estimate_memory() {
let config = StreamingConfig::default();
let estimate = config.estimate_memory(CompressionMethod::Lzma2, None);
assert!(estimate.minimum > 0);
assert!(estimate.typical >= estimate.minimum);
assert!(estimate.maximum >= estimate.typical);
}
#[test]
fn test_default_config() {
let config = StreamingConfig::default();
assert_eq!(config.max_memory_buffer, 64 * 1024 * 1024);
assert_eq!(config.read_buffer_size, 64 * 1024);
assert!(config.verify_crc);
assert!(config.track_progress);
}
#[test]
fn test_low_memory_config() {
let config = StreamingConfig::low_memory();
assert_eq!(config.max_memory_buffer, 8 * 1024 * 1024);
assert!(config.max_memory_buffer < StreamingConfig::default().max_memory_buffer);
}
#[test]
fn test_high_performance_config() {
let config = StreamingConfig::high_performance();
assert_eq!(config.max_memory_buffer, 256 * 1024 * 1024);
assert!(config.max_memory_buffer > StreamingConfig::default().max_memory_buffer);
}
#[test]
fn test_builder_pattern() {
let config = StreamingConfig::new()
.max_memory_buffer(16 * 1024 * 1024)
.read_buffer_size(32 * 1024)
.verify_crc(false)
.track_progress(false);
assert_eq!(config.max_memory_buffer, 16 * 1024 * 1024);
assert_eq!(config.read_buffer_size, 32 * 1024);
assert!(!config.verify_crc);
assert!(!config.track_progress);
}
#[test]
fn test_validation_success() {
let config = StreamingConfig::default();
assert!(config.validate().is_ok());
}
#[test]
fn test_validation_zero_memory_buffer() {
let config = StreamingConfig::new().max_memory_buffer(0);
assert!(config.validate().is_err());
}
#[test]
fn test_validation_zero_read_buffer() {
let config = StreamingConfig::new().read_buffer_size(0);
assert!(config.validate().is_err());
}
#[test]
fn test_validation_read_buffer_exceeds_max() {
let config = StreamingConfig::new()
.max_memory_buffer(1024)
.read_buffer_size(2048);
assert!(config.validate().is_err());
}
#[test]
fn test_decoder_pool_capacity_default() {
let config = StreamingConfig::default();
assert_eq!(config.decoder_pool_capacity, Some(0)); assert!(config.resolved_decoder_pool_capacity() >= 1);
}
#[test]
fn test_decoder_pool_capacity_explicit() {
let config = StreamingConfig::new().decoder_pool_capacity(Some(8));
assert_eq!(config.resolved_decoder_pool_capacity(), 8);
}
#[test]
fn test_decoder_pool_capacity_disabled() {
let config = StreamingConfig::new().disable_decoder_pool();
assert_eq!(config.decoder_pool_capacity, None);
assert_eq!(config.resolved_decoder_pool_capacity(), 0);
}
#[test]
fn test_low_memory_decoder_pool() {
let config = StreamingConfig::low_memory();
assert_eq!(config.decoder_pool_capacity, Some(2));
assert_eq!(config.resolved_decoder_pool_capacity(), 2);
}
#[test]
fn test_auto_sized() {
let config = StreamingConfig::auto_sized();
assert!(config.max_memory_buffer >= 32 * 1024 * 1024); assert!(config.max_memory_buffer <= 1024 * 1024 * 1024); assert!(config.read_buffer_size >= 32 * 1024); assert!(config.read_buffer_size <= 512 * 1024); assert!(config.validate().is_ok());
}
#[test]
fn test_system_memory_info() {
let info_opt = StreamingConfig::system_memory_info();
#[cfg(feature = "sysinfo")]
{
let info = info_opt.expect("sysinfo feature enabled but no memory info");
assert!(info.total > 0, "Total memory should be > 0");
assert!(info.used <= info.total, "Used should be <= total");
}
#[cfg(not(feature = "sysinfo"))]
{
assert!(info_opt.is_none(), "Without sysinfo, should return None");
}
}
#[test]
fn test_system_memory_info_usage_percent() {
let info = SystemMemoryInfo {
total: 16 * 1024 * 1024 * 1024, available: 8 * 1024 * 1024 * 1024, used: 8 * 1024 * 1024 * 1024, };
assert!((info.usage_percent() - 50.0).abs() < 0.001);
}
#[test]
fn test_system_memory_info_zero_total() {
let info = SystemMemoryInfo {
total: 0,
available: 0,
used: 0,
};
assert!((info.usage_percent() - 0.0).abs() < f64::EPSILON);
}
}