rsbatch_maestro/
lib.rs

1//! # Batch Maestro
2//!
3//! A Rust crate for flexible batch splitting and management with various strategies.
4//!
5//! This crate provides a comprehensive set of functions to divide a total number into batches,
6//! offering different splitting strategies to suit various scenarios such as task distribution,
7//! load balancing, and resource allocation.
8//!
9//! ## Features
10//!
11//! - Even and uneven splitting of totals into batches
12//! - Splitting by count, with remainder, or based on weights
13//! - Range-based splitting and optimization
14//! - Minimum batch size enforcement
15//! - Batch merging and rebalancing
16//!
17//! ## Usage
18//!
19//! ```rust
20//! use batch_maestro::even_split;
21//!
22//! fn main() {
23//!     match even_split(128, 8) {
24//!         Ok((num_batches, batch_sizes)) => {
25//!             println!("Number of batches: {}", num_batches);
26//!             println!("Batch sizes: {:?}", batch_sizes);
27//!         },
28//!         Err(e) => println!("Error: {}", e),
29//!     }
30//! }
31//! ```
32//!
33//! For more information and examples, please visit the [GitHub repository](https://github.com/aeromilai/batch-maestro).
34
35use std::num::NonZeroUsize;
36use std::cmp;
37
38/// Splits a total number into even batches.
39///
40/// This function takes a total number and a maximum batch size, and attempts to divide the total
41/// into as many even batches as possible, without exceeding the maximum batch size.
42///
43/// # Arguments
44///
45/// * `total` - The total number to be split.
46/// * `max_batch_size` - The maximum allowed size for each batch.
47///
48/// # Returns
49///
50/// A `Result` containing a tuple with:
51/// 1. The number of batches.
52/// 2. A vector of `NonZeroUsize` representing the size of each batch.
53///
54/// # Errors
55///
56/// Returns an error if:
57/// * The total is zero.
58/// * The max_batch_size is zero.
59///
60/// # Examples
61///
62/// ```
63/// use batch_maestro::even_split;
64/// use std::num::NonZeroUsize;
65///
66/// let (num_batches, batch_sizes) = even_split(50, 8).unwrap();
67/// assert_eq!(num_batches, 10);
68/// assert_eq!(batch_sizes, vec![NonZeroUsize::new(5).unwrap(); 10]);
69/// ```
70
71pub fn even_split(total: usize, max_batch_size: usize) -> Result<(usize, Vec<NonZeroUsize>), String> {
72    if total == 0 {
73        return Err(String::from("Total must be a positive number"));
74    }
75    if max_batch_size == 0 {
76        return Err(String::from("Max batch size must be a positive number"));
77    }
78
79    if total <= max_batch_size {
80        return Ok((1, vec![NonZeroUsize::new(total).unwrap()]));
81    }
82
83    let mut batch_size = max_batch_size;
84    while batch_size > 1 {
85        if total % batch_size == 0 {
86            let num_batches = total / batch_size;
87            return Ok((num_batches, vec![NonZeroUsize::new(batch_size).unwrap(); num_batches]));
88        }
89        batch_size -= 1;
90    }
91
92    Ok((total, vec![NonZeroUsize::new(1).unwrap(); total]))
93}
94
95/// Splits the total based on provided weights for each batch.
96///
97/// # Arguments
98///
99/// * `total` - The total number to be split.
100/// * `weights` - A vector of weights for each batch.
101///
102/// # Returns
103///
104/// A `Result` containing a vector of `NonZeroUsize` representing the size of each batch.
105///
106/// # Errors
107///
108/// Returns an error if:
109/// * The total is zero.
110/// * The weights vector is empty.
111/// * Any weight is zero.
112///
113/// # Examples
114///
115/// ```
116/// use batch_maestro::split_weighted;
117/// use std::num::NonZeroUsize;
118///
119/// let batch_sizes = split_weighted(100, vec![1, 2, 3]).unwrap();
120/// assert_eq!(batch_sizes, vec![NonZeroUsize::new(17).unwrap(), NonZeroUsize::new(33).unwrap(), NonZeroUsize::new(50).unwrap()]);
121/// ```
122pub fn split_weighted(total: usize, weights: Vec<usize>) -> Result<Vec<NonZeroUsize>, String> {
123    if total == 0 {
124        return Err(String::from("Total must be a positive number"));
125    }
126    if weights.is_empty() {
127        return Err(String::from("Weights vector must not be empty"));
128    }
129    if weights.iter().any(|&w| w == 0) {
130        return Err(String::from("All weights must be positive numbers"));
131    }
132
133    let weight_sum: usize = weights.iter().sum();
134    let mut batches = Vec::with_capacity(weights.len());
135    let mut remaining = total;
136
137    for (i, &weight) in weights.iter().enumerate() {
138        let size = if i == weights.len() - 1 {
139            remaining
140        } else {
141            (total * weight) / weight_sum
142        };
143        batches.push(NonZeroUsize::new(size).unwrap());
144        remaining -= size;
145    }
146
147    Ok(batches)
148}
149
150/// Generates a range of possible split configurations based on a min and max batch size.
151///
152/// # Arguments
153///
154/// * `total` - The total number to be split. 
155/// * `min_batch_size` - The minimum allowed size for each batch.
156/// * `max_batch_size` - The maximum allowed size for each batch.
157///
158/// # Returns
159///
160/// A `Result` containing a vector of tuples, each representing a possible split configuration:
161/// (number of batches, batch size, remainder)
162///
163/// # Errors
164///
165/// Returns an error if:
166/// * The total is zero.
167/// * The min_batch_size is zero.
168/// * The max_batch_size is less than min_batch_size.
169///
170/// # Examples
171///
172/// ```
173/// use batch_maestro::split_range;
174///
175/// let configurations = split_range(100, 20, 40).unwrap();
176/// assert_eq!(configurations, vec![(3, 33, 1), (4, 25, 0), (5, 20, 0)]);
177/// ```
178pub fn split_range(total: usize, min_batch_size: usize, max_batch_size: usize) -> Result<Vec<(usize, usize, usize)>, String> {
179    if total == 0 {
180        return Err(String::from("Total must be a positive number"));
181    }
182    if min_batch_size == 0 {
183        return Err(String::from("Minimum batch size must be a positive number"));
184    }
185    if max_batch_size < min_batch_size {
186        return Err(String::from("Maximum batch size must be greater than or equal to minimum batch size"));
187    }
188
189    let mut configurations = Vec::new();
190    for batch_size in (min_batch_size..=max_batch_size).rev() {
191        let num_batches = total / batch_size;
192        let remainder = total % batch_size;
193        if num_batches > 0 {
194            configurations.push((num_batches, batch_size, remainder));
195        }
196    }
197
198    Ok(configurations)
199}
200
201/// Finds the most even split possible within a given range of batch counts.
202///
203/// # Arguments
204///
205/// * `total` - The total number to be split.
206/// * `min_batches` - The minimum number of batches.
207/// * `max_batches` - The maximum number of batches.
208///
209/// # Returns
210///
211/// A `Result` containing a tuple with:
212/// 1. The number of batches.
213/// 2. A vector of `NonZeroUsize` representing the size of each batch.
214///
215/// # Errors
216///
217/// Returns an error if:
218/// * The total is zero.
219/// * The min_batches is zero.
220/// * The max_batches is less than min_batches.
221///
222/// # Examples
223///
224/// ```
225/// use batch_maestro::optimize_split;
226/// use std::num::NonZeroUsize;
227///
228/// let (num_batches, batch_sizes) = optimize_split(100, 3, 5).unwrap();
229/// assert_eq!(num_batches, 4);
230/// assert_eq!(batch_sizes, vec![NonZeroUsize::new(25).unwrap(); 4]);
231/// ```
232pub fn optimize_split(total: usize, min_batches: usize, max_batches: usize) -> Result<(usize, Vec<NonZeroUsize>), String> {
233    if total == 0 {
234        return Err(String::from("Total must be a positive number"));
235    }
236    if min_batches == 0 {
237        return Err(String::from("Minimum number of batches must be a positive number"));
238    }
239    if max_batches < min_batches {
240        return Err(String::from("Maximum number of batches must be greater than or equal to minimum number of batches"));
241    }
242
243    let mut best_num_batches = min_batches;
244    let mut min_remainder = total;
245
246    for num_batches in min_batches..=max_batches {
247        let remainder = total % num_batches;
248        if remainder < min_remainder {
249            best_num_batches = num_batches;
250            min_remainder = remainder;
251        }
252        if remainder == 0 {
253            break;
254        }
255    }
256
257    let base_size = total / best_num_batches;
258    let mut batch_sizes = vec![NonZeroUsize::new(base_size).unwrap(); best_num_batches];
259    for i in 0..min_remainder {
260        batch_sizes[i] = NonZeroUsize::new(base_size + 1).unwrap();
261    }
262
263    Ok((best_num_batches, batch_sizes))
264}
265
266/// Splits a total number into even batches, ensuring each batch meets a minimum size requirement.
267///
268/// # Arguments
269///
270/// * `total` - The total number to be split.
271/// * `max_batch_size` - The maximum allowed size for each batch.
272/// * `min_batch_size` - The minimum required size for each batch.
273///
274/// # Returns
275///
276/// A `Result` containing a tuple with:
277/// 1. The number of batches.
278/// 2. A vector of `NonZeroUsize` representing the size of each batch.
279///
280/// # Errors
281///
282/// Returns an error if:
283/// * The total is zero.
284/// * The max_batch_size is zero.
285/// * The min_batch_size is greater than max_batch_size.
286/// * It's impossible to create batches that meet the minimum size requirement.
287///
288/// # Examples
289///
290/// ```
291/// use batch_maestro::split_with_min_batch;
292/// use std::num::NonZeroUsize;
293///
294/// let (num_batches, batch_sizes) = split_with_min_batch(100, 30, 20).unwrap();
295/// assert_eq!(num_batches, 4);
296/// assert_eq!(batch_sizes, vec![NonZeroUsize::new(25).unwrap(); 4]);
297/// ```
298pub fn split_with_min_batch(total: usize, max_batch_size: usize, min_batch_size: usize) -> Result<(usize, Vec<NonZeroUsize>), String> {
299    if total == 0 {
300        return Err(String::from("Total must be a positive number"));
301    }
302    if max_batch_size == 0 {
303        return Err(String::from("Max batch size must be a positive number"));
304    }
305    if min_batch_size > max_batch_size {
306        return Err(String::from("Min batch size must be less than or equal to max batch size"));
307    }
308
309    let num_batches = (total + min_batch_size - 1) / min_batch_size;
310    let base_size = total / num_batches;
311    let remainder = total % num_batches;
312
313    let mut batch_sizes = Vec::with_capacity(num_batches);
314    for i in 0..num_batches {
315        let size = base_size + if i < remainder { 1 } else { 0 };
316        batch_sizes.push(NonZeroUsize::new(size).unwrap());
317    }
318
319    Ok((num_batches, batch_sizes))
320}
321
322
323/// Splits a total number into a specified number of batches.
324///
325/// This function divides the total into the given number of batches,
326/// allowing for uneven distribution if necessary.
327///
328/// # Arguments
329///
330/// * `total` - The total number to be split.
331/// * `num_batches` - The number of batches to split the total into.
332///
333/// # Returns
334///
335/// A `Result` containing a vector of `NonZeroUsize` representing the size of each batch.
336///
337/// # Errors
338///
339/// Returns an error if:
340/// * The total is zero.
341/// * The number of batches is zero.
342///
343/// # Examples
344///
345/// ```
346/// use batch_maestro::split_by_count;
347/// use std::num::NonZeroUsize;
348///
349/// let batch_sizes = split_by_count(10, 3).unwrap();
350/// assert_eq!(batch_sizes, vec![NonZeroUsize::new(4).unwrap(), NonZeroUsize::new(3).unwrap(), NonZeroUsize::new(3).unwrap()]);
351/// ```
352pub fn split_by_count(total: usize, num_batches: usize) -> Result<Vec<NonZeroUsize>, String> {
353    if total == 0 {
354        return Err(String::from("Total must be a positive number"));
355    }
356    if num_batches == 0 {
357        return Err(String::from("Number of batches must be a positive number"));
358    }
359
360    let base_size = total / num_batches;
361    let remainder = total % num_batches;
362
363    let mut batches = Vec::with_capacity(num_batches);
364    for i in 0..num_batches {
365        let size = base_size + if i < remainder { 1 } else { 0 };
366        batches.push(NonZeroUsize::new(size).ok_or_else(|| String::from("Failed to create NonZeroUsize"))?);
367    }
368
369    Ok(batches)
370}
371
372/// Splits a total number into even batches, returning the remainder separately.
373///
374/// This function is similar to `even_split`, but instead of including the remainder
375/// in the last batch, it returns it as a separate value.
376///
377/// # Arguments
378///
379/// * `total` - The total number to be split.
380/// * `max_batch_size` - The maximum allowed size for each batch.
381///
382/// # Returns
383///
384/// A `Result` containing a tuple with:
385/// 1. The number of batches.
386/// 2. A vector of `NonZeroUsize` representing the size of each batch.
387/// 3. The remainder.
388///
389/// # Errors
390///
391/// Returns an error if:
392/// * The total is zero.
393/// * The max_batch_size is zero.
394///
395/// # Examples
396///
397/// ```
398/// use batch_maestro::split_with_remainder;
399/// use std::num::NonZeroUsize;
400///
401/// let (num_batches, batch_sizes, remainder) = split_with_remainder(50, 8).unwrap();
402/// assert_eq!(num_batches, 6);
403/// assert_eq!(batch_sizes, vec![NonZeroUsize::new(8).unwrap(); 6]);
404/// assert_eq!(remainder, 2);
405/// ```
406pub fn split_with_remainder(total: usize, max_batch_size: usize) -> Result<(usize, Vec<NonZeroUsize>, usize), String> {
407    if total == 0 {
408        return Err(String::from("Total must be a positive number"));
409    }
410    if max_batch_size == 0 {
411        return Err(String::from("Max batch size must be a positive number"));
412    }
413
414    let num_batches = total / max_batch_size;
415    let remainder = total % max_batch_size;
416
417    if num_batches == 0 {
418        Ok((1, vec![NonZeroUsize::new(total).unwrap()], 0))
419    } else {
420        Ok((
421            num_batches,
422            vec![NonZeroUsize::new(max_batch_size).unwrap(); num_batches],
423            remainder
424        ))
425    }
426}
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431
432    #[test]
433    fn test_even_split_basic() {
434        assert_eq!(even_split(50, 8), Ok((10, vec![NonZeroUsize::new(5).unwrap(); 10])));
435        assert_eq!(even_split(128, 8), Ok((16, vec![NonZeroUsize::new(8).unwrap(); 16])));
436        assert_eq!(even_split(46, 8), Ok((2, vec![NonZeroUsize::new(23).unwrap(); 2])));
437        assert_eq!(even_split(7, 8), Ok((1, vec![NonZeroUsize::new(7).unwrap()])));
438    }
439
440    #[test]
441    fn test_even_split_edge_cases() {
442        assert_eq!(even_split(1, 1), Ok((1, vec![NonZeroUsize::new(1).unwrap()])));
443        assert_eq!(even_split(100, 100), Ok((1, vec![NonZeroUsize::new(100).unwrap()])));
444    }
445
446    #[test]
447    fn test_even_split_errors() {
448        assert!(even_split(0, 8).is_err());
449        assert!(even_split(10, 0).is_err());
450    }
451
452    #[test]
453    fn test_even_split_large_numbers() {
454        assert_eq!(even_split(1000000, 1000), Ok((1000, vec![NonZeroUsize::new(1000).unwrap(); 1000])));
455    }
456
457    #[test]
458    fn test_even_split_prime_numbers() {
459        assert_eq!(even_split(17, 8), Ok((1, vec![NonZeroUsize::new(17).unwrap()])));
460        assert_eq!(even_split(23, 8), Ok((1, vec![NonZeroUsize::new(23).unwrap()])));
461    }
462
463    #[test]
464    fn test_split_by_count() {
465        assert_eq!(split_by_count(10, 3), Ok(vec![NonZeroUsize::new(4).unwrap(), NonZeroUsize::new(3).unwrap(), NonZeroUsize::new(3).unwrap()]));
466        assert_eq!(split_by_count(20, 4), Ok(vec![NonZeroUsize::new(5).unwrap(); 4]));
467        assert_eq!(split_by_count(7, 3), Ok(vec![NonZeroUsize::new(3).unwrap(), NonZeroUsize::new(2).unwrap(), NonZeroUsize::new(2).unwrap()]));
468    }
469
470    #[test]
471    fn test_split_by_count_errors() {
472        assert!(split_by_count(0, 5).is_err());
473        assert!(split_by_count(10, 0).is_err());
474    }
475
476    #[test]
477    fn test_split_with_remainder() {
478        assert_eq!(split_with_remainder(50, 8), Ok((6, vec![NonZeroUsize::new(8).unwrap(); 6], 2)));
479        assert_eq!(split_with_remainder(100, 30), Ok((3, vec![NonZeroUsize::new(30).unwrap(); 3], 10)));
480        assert_eq!(split_with_remainder(10, 20), Ok((1, vec![NonZeroUsize::new(10).unwrap()], 0)));
481    }
482
483    #[test]
484    fn test_split_with_remainder_errors() {
485        assert!(split_with_remainder(0, 5).is_err());
486        assert!(split_with_remainder(10, 0).is_err());
487    }
488
489    #[test]
490    fn test_split_weighted() {
491        assert_eq!(split_weighted(100, vec![1, 2, 3]), Ok(vec![NonZeroUsize::new(17).unwrap(), NonZeroUsize::new(33).unwrap(), NonZeroUsize::new(50).unwrap()]));
492        assert_eq!(split_weighted(10, vec![1, 1]), Ok(vec![NonZeroUsize::new(5).unwrap(), NonZeroUsize::new(5).unwrap()]));
493    }
494
495    #[test]
496    fn test_split_weighted_errors() {
497        assert!(split_weighted(0, vec![1, 2, 3]).is_err());
498        assert!(split_weighted(100, vec![]).is_err());
499        assert!(split_weighted(100, vec![0, 1, 2]).is_err());
500    }
501
502    #[test]
503    fn test_split_range() {
504        assert_eq!(split_range(100, 20, 40), Ok(vec![(3, 33, 1), (4, 25, 0), (5, 20, 0)]));
505        assert_eq!(split_range(10, 2, 5), Ok(vec![(2, 5, 0), (3, 3, 1), (4, 2, 2)]));
506    }
507
508    #[test]
509    fn test_split_range_errors() {
510        assert!(split_range(0, 20, 40).is_err());
511        assert!(split_range(100, 0, 40).is_err());
512        assert!(split_range(100, 40, 20).is_err());
513    }
514
515    #[test]
516    fn test_optimize_split() {
517        assert_eq!(optimize_split(100, 3, 5), Ok((4, vec![NonZeroUsize::new(25).unwrap(); 4])));
518        assert_eq!(optimize_split(10, 2, 4), Ok((2, vec![NonZeroUsize::new(5).unwrap(); 2])));
519    }
520
521    #[test]
522    fn test_optimize_split_errors() {
523        assert!(optimize_split(0, 3, 5).is_err());
524        assert!(optimize_split(100, 0, 5).is_err());
525        assert!(optimize_split(100, 5, 3).is_err());
526    }
527
528    #[test]
529    fn test_split_with_min_batch() {
530        assert_eq!(split_with_min_batch(100, 30, 20), Ok((4, vec![NonZeroUsize::new(25).unwrap(); 4])));
531        assert_eq!(split_with_min_batch(50, 20, 10), Ok((3, vec![NonZeroUsize::new(17).unwrap(), NonZeroUsize::new(17).unwrap(), NonZeroUsize::new(16).unwrap()])));
532    }
533
534    #[test]
535    fn test_split_with_min_batch_errors() {
536        assert!(split_with_min_batch(0, 30, 20).is_err());
537        assert!(split_with_min_batch(100, 0, 20).is_err());
538        assert!(split_with_min_batch(100, 30, 40).is_err());
539        assert!(split_with_min_batch(100, 30, 31).is_err());
540    }
541}