use crate::error::Error;
#[derive(Debug, Clone)]
pub struct CompactOptions {
target_file_size: u64,
max_input_file_size: u64,
min_files_per_group: usize,
max_compaction_group_bytes: u64,
partition_filter: Option<String>,
dry_run: bool,
allow_partial_failure: bool,
}
impl Default for CompactOptions {
fn default() -> Self {
Self {
target_file_size: 256 * 1024 * 1024, max_input_file_size: 128 * 1024 * 1024, min_files_per_group: 3,
max_compaction_group_bytes: 512 * 1024 * 1024, partition_filter: None,
dry_run: false,
allow_partial_failure: false,
}
}
}
impl CompactOptions {
const MIN_TARGET_FILE_SIZE: u64 = 1024;
pub fn new() -> Self {
Self::default()
}
pub fn with_target_file_size(mut self, size: u64) -> crate::error::Result<Self> {
if size == 0 {
return Err(Error::invalid_input(
"target_file_size must be greater than 0",
));
}
if size < Self::MIN_TARGET_FILE_SIZE {
return Err(Error::invalid_input(format!(
"target_file_size must be at least {} bytes (1KB), got {}",
Self::MIN_TARGET_FILE_SIZE,
size
)));
}
if size <= self.max_input_file_size {
return Err(Error::invalid_input(format!(
"target_file_size ({}) must be greater than max_input_file_size ({})",
size, self.max_input_file_size
)));
}
self.target_file_size = size;
Ok(self)
}
pub fn with_max_input_file_size(mut self, size: u64) -> crate::error::Result<Self> {
if size == 0 {
return Err(Error::invalid_input(
"max_input_file_size must be greater than 0",
));
}
if size >= self.target_file_size {
return Err(Error::invalid_input(format!(
"max_input_file_size ({}) must be less than target_file_size ({})",
size, self.target_file_size
)));
}
self.max_input_file_size = size;
Ok(self)
}
pub fn with_min_files_per_group(mut self, count: usize) -> crate::error::Result<Self> {
if count < 2 {
return Err(Error::invalid_input(format!(
"min_files_per_group must be at least 2 (cannot compact fewer than 2 files), got {}",
count
)));
}
self.min_files_per_group = count;
Ok(self)
}
pub fn with_partition_filter(mut self, partition: String) -> Self {
self.partition_filter = Some(partition);
self
}
pub fn with_dry_run(mut self, dry_run: bool) -> Self {
self.dry_run = dry_run;
self
}
pub fn with_allow_partial_failure(mut self, allow: bool) -> Self {
self.allow_partial_failure = allow;
self
}
pub fn with_max_compaction_group_bytes(mut self, bytes: u64) -> crate::error::Result<Self> {
if bytes < self.target_file_size {
return Err(Error::invalid_input(format!(
"max_compaction_group_bytes ({}) must be at least target_file_size ({})",
bytes, self.target_file_size
)));
}
self.max_compaction_group_bytes = bytes;
Ok(self)
}
pub fn target_file_size(&self) -> u64 {
self.target_file_size
}
pub fn max_input_file_size(&self) -> u64 {
self.max_input_file_size
}
pub fn min_files_per_group(&self) -> usize {
self.min_files_per_group
}
pub fn max_compaction_group_bytes(&self) -> u64 {
self.max_compaction_group_bytes
}
pub fn partition_filter(&self) -> Option<&str> {
self.partition_filter.as_deref()
}
pub fn dry_run(&self) -> bool {
self.dry_run
}
pub fn allow_partial_failure(&self) -> bool {
self.allow_partial_failure
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_options() {
let options = CompactOptions::default();
assert_eq!(options.target_file_size(), 256 * 1024 * 1024);
assert_eq!(options.max_input_file_size(), 128 * 1024 * 1024);
assert_eq!(options.min_files_per_group(), 3);
assert_eq!(options.max_compaction_group_bytes(), 512 * 1024 * 1024);
assert_eq!(options.partition_filter(), None);
assert!(!options.dry_run());
assert!(!options.allow_partial_failure());
}
#[test]
fn test_with_max_compaction_group_bytes_valid() {
let options = CompactOptions::new()
.with_max_compaction_group_bytes(1024 * 1024 * 1024) .unwrap();
assert_eq!(options.max_compaction_group_bytes(), 1024 * 1024 * 1024);
}
#[test]
fn test_with_max_compaction_group_bytes_less_than_target() {
let result = CompactOptions::new().with_max_compaction_group_bytes(128 * 1024 * 1024);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("must be at least target_file_size"));
}
#[test]
fn test_with_target_file_size_zero() {
let result = CompactOptions::new().with_target_file_size(0);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("target_file_size must be greater than 0"));
}
#[test]
fn test_with_target_file_size_below_minimum() {
let result = CompactOptions::new().with_target_file_size(512);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("at least 1024 bytes"));
}
#[test]
fn test_with_target_file_size_less_than_max_input() {
let result = CompactOptions::new().with_target_file_size(64 * 1024 * 1024);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("must be greater than max_input_file_size"));
}
#[test]
fn test_with_max_input_file_size_zero() {
let result = CompactOptions::new().with_max_input_file_size(0);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("max_input_file_size must be greater than 0"));
}
#[test]
fn test_with_max_input_file_size_greater_than_target() {
let result = CompactOptions::new().with_max_input_file_size(512 * 1024 * 1024);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("must be less than target_file_size"));
}
#[test]
fn test_with_min_files_per_group_zero() {
let result = CompactOptions::new().with_min_files_per_group(0);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("must be at least 2"));
}
#[test]
fn test_with_min_files_per_group_one() {
let result = CompactOptions::new().with_min_files_per_group(1);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("cannot compact fewer than 2 files"));
}
#[test]
fn test_valid_configuration() {
let options = CompactOptions::new()
.with_target_file_size(512 * 1024 * 1024)
.unwrap()
.with_max_input_file_size(256 * 1024 * 1024)
.unwrap()
.with_min_files_per_group(5)
.unwrap()
.with_dry_run(true)
.with_allow_partial_failure(true)
.with_partition_filter("year=2025".to_string());
assert_eq!(options.target_file_size(), 512 * 1024 * 1024);
assert_eq!(options.max_input_file_size(), 256 * 1024 * 1024);
assert_eq!(options.min_files_per_group(), 5);
assert_eq!(options.partition_filter(), Some("year=2025"));
assert!(options.dry_run());
assert!(options.allow_partial_failure());
}
#[test]
fn test_builder_chain_order_matters() {
let result = CompactOptions::new()
.with_max_input_file_size(64 * 1024 * 1024)
.unwrap()
.with_target_file_size(128 * 1024 * 1024);
assert!(result.is_ok());
let result = CompactOptions::new()
.with_target_file_size(512 * 1024 * 1024)
.unwrap()
.with_max_input_file_size(256 * 1024 * 1024);
assert!(result.is_ok());
}
#[test]
fn test_fields_are_private() {
let options = CompactOptions::new();
let _ = options.target_file_size();
let _ = options.max_input_file_size();
let _ = options.min_files_per_group();
let _ = options.partition_filter();
let _ = options.dry_run();
let _ = options.allow_partial_failure();
}
#[test]
fn test_getter_methods() {
let options = CompactOptions::new()
.with_target_file_size(512 * 1024 * 1024)
.unwrap()
.with_max_input_file_size(256 * 1024 * 1024)
.unwrap()
.with_partition_filter("test".to_string());
assert_eq!(options.target_file_size(), 512 * 1024 * 1024);
assert_eq!(options.max_input_file_size(), 256 * 1024 * 1024);
assert_eq!(options.partition_filter(), Some("test"));
}
}