#[derive(Debug, Clone, Copy)]
pub struct PoolSizeHint {
pub estimated_total_size: usize,
pub string_buffer_count: usize,
pub string_buffer_capacity: usize,
pub byte_buffer_count: usize,
pub byte_buffer_capacity: usize,
}
impl PoolSizeHint {
#[inline]
pub fn estimated_string_pool_memory(&self) -> usize {
self.string_buffer_count * self.string_buffer_capacity
}
#[inline]
pub fn estimated_byte_pool_memory(&self) -> usize {
self.byte_buffer_count * self.byte_buffer_capacity
}
#[inline]
pub fn total_pool_memory(&self) -> usize {
self.estimated_string_pool_memory() + self.estimated_byte_pool_memory()
}
}
#[inline]
fn get_format_ratio(mime_type: &str) -> f64 {
match mime_type {
"text/plain" | "text/markdown" | "text/x-markdown" => 0.95,
"text/csv" | "text/tab-separated-values" => 0.90,
"text/html" | "text/html; charset=utf-8" => 0.65,
"application/xml" | "text/xml" => 0.60,
"image/svg+xml" => 0.55,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
| "application/vnd.openxmlformats-officedocument.wordprocessingml.macro-enabled.document"
| "application/msword" => 0.45,
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
| "application/vnd.openxmlformats-officedocument.spreadsheetml.macro-enabled.sheet"
| "application/vnd.ms-excel" => 0.40,
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
| "application/vnd.openxmlformats-officedocument.presentationml.macro-enabled.presentation"
| "application/vnd.ms-powerpoint" => 0.35,
"application/vnd.oasis.opendocument.text" => 0.45,
"application/vnd.oasis.opendocument.spreadsheet" => 0.40,
"application/vnd.oasis.opendocument.presentation" => 0.35,
"application/pdf" => 0.25,
"application/json" | "text/json" => 0.80,
"application/x-yaml" | "text/yaml" | "text/x-yaml" | "application/yaml" => 0.85,
"application/zip" | "application/x-zip-compressed" => 0.30,
"application/gzip" | "application/x-gzip" => 0.25,
"application/x-rar-compressed" => 0.30,
"application/x-7z-compressed" => 0.25,
_ => 0.50,
}
}
#[inline]
fn get_format_base_config(mime_type: &str) -> (usize, usize) {
match mime_type {
"text/plain" | "text/markdown" | "text/x-markdown" => (2, 4096),
"text/csv" | "text/tab-separated-values" => (3, 8192),
"text/html" | "text/html; charset=utf-8" => (8, 16384),
"application/xml" | "text/xml" => (5, 8192),
"image/svg+xml" => (4, 8192),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
| "application/vnd.openxmlformats-officedocument.wordprocessingml.macro-enabled.document"
| "application/msword" => (5, 8192),
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
| "application/vnd.openxmlformats-officedocument.spreadsheetml.macro-enabled.sheet"
| "application/vnd.ms-excel" => (4, 8192),
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
| "application/vnd.openxmlformats-officedocument.presentationml.macro-enabled.presentation"
| "application/vnd.ms-powerpoint" => (4, 8192),
"application/vnd.oasis.opendocument.text" => (5, 8192),
"application/vnd.oasis.opendocument.spreadsheet" => (4, 8192),
"application/vnd.oasis.opendocument.presentation" => (4, 8192),
"application/pdf" => (6, 16384),
"application/json" | "text/json" => (4, 8192),
"application/x-yaml" | "text/yaml" | "text/x-yaml" | "application/yaml" => (4, 8192),
_ => (3, 8192),
}
}
#[inline]
fn adjust_for_file_size(file_size: u64, base_count: usize) -> usize {
match file_size {
0..=100_000 => base_count,
100_001..=1_000_000 => base_count.saturating_add(2),
1_000_001..=10_000_000 => base_count.saturating_add(4),
_ => base_count.saturating_add(6),
}
}
#[inline]
fn estimate_buffer_capacity(file_size: u64) -> usize {
match file_size {
0..=10_000 => 1024,
10_001..=100_000 => 4096,
100_001..=1_000_000 => 16384,
1_000_001..=10_000_000 => 65536,
_ => 262144,
}
}
#[inline]
pub fn estimate_pool_size(file_size: u64, mime_type: &str) -> PoolSizeHint {
let format_ratio = get_format_ratio(mime_type);
let (base_count, _base_capacity) = get_format_base_config(mime_type);
let adjusted_string_buffer_count = adjust_for_file_size(file_size, base_count);
let buffer_capacity = estimate_buffer_capacity(file_size);
let estimated_total_size = (file_size as f64 * format_ratio).ceil() as usize;
let byte_buffer_count = (adjusted_string_buffer_count / 2).max(1);
let byte_buffer_capacity = buffer_capacity * 8;
PoolSizeHint {
estimated_total_size,
string_buffer_count: adjusted_string_buffer_count,
string_buffer_capacity: buffer_capacity,
byte_buffer_count,
byte_buffer_capacity,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_format_ratio_pdf() {
let ratio = get_format_ratio("application/pdf");
assert_eq!(ratio, 0.25);
}
#[test]
fn test_format_ratio_html() {
let ratio = get_format_ratio("text/html");
assert_eq!(ratio, 0.65);
}
#[test]
fn test_format_ratio_docx() {
let ratio = get_format_ratio("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
assert_eq!(ratio, 0.45);
}
#[test]
fn test_format_ratio_default() {
let ratio = get_format_ratio("application/unknown-format");
assert_eq!(ratio, 0.50);
}
#[test]
fn test_small_file_sizing() {
let hint = estimate_pool_size(5_000, "application/pdf");
assert_eq!(hint.string_buffer_count, 6);
assert_eq!(hint.string_buffer_capacity, 1024);
}
#[test]
fn test_medium_file_sizing() {
let hint = estimate_pool_size(500_000, "application/pdf");
assert_eq!(hint.string_buffer_count, 8);
assert_eq!(hint.string_buffer_capacity, 16384);
}
#[test]
fn test_large_file_sizing() {
let hint = estimate_pool_size(5_000_000, "application/pdf");
assert_eq!(hint.string_buffer_count, 10);
assert_eq!(hint.string_buffer_capacity, 65536);
}
#[test]
fn test_huge_file_sizing() {
let hint = estimate_pool_size(50_000_000, "application/pdf");
assert_eq!(hint.string_buffer_count, 12);
assert_eq!(hint.string_buffer_capacity, 262144);
}
#[test]
fn test_html_sizing() {
let hint = estimate_pool_size(1_000_000, "text/html");
assert_eq!(hint.string_buffer_count, 10);
assert_eq!(hint.string_buffer_capacity, 16384);
assert_eq!(hint.estimated_total_size, 650_000);
}
#[test]
fn test_text_sizing() {
let hint = estimate_pool_size(1_000_000, "text/plain");
assert_eq!(hint.string_buffer_count, 4);
assert_eq!(hint.estimated_total_size, 950_000);
}
#[test]
fn test_byte_buffer_sizing() {
let hint = estimate_pool_size(5_000_000, "application/pdf");
assert!(hint.byte_buffer_count < hint.string_buffer_count);
assert_eq!(hint.byte_buffer_capacity, hint.string_buffer_capacity * 8);
}
#[test]
fn test_total_size_estimation() {
let hint = estimate_pool_size(10_000_000, "application/pdf");
assert_eq!(hint.estimated_total_size, 2_500_000);
}
#[test]
fn test_xlsx_sizing() {
let hint = estimate_pool_size(
2_000_000,
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
);
assert_eq!(hint.estimated_total_size, 800_000);
assert_eq!(hint.string_buffer_count, 8);
}
}