use crate::hunk::Hunk;
pub const DEFAULT_MAX_LINES: usize = 500;
#[derive(Debug, Clone)]
pub struct Batch {
pub hunks: Vec<Hunk>,
pub total_lines: usize,
}
impl Batch {
pub fn content(&self) -> String {
self.hunks
.iter()
.map(|h| h.content.as_str())
.collect::<Vec<_>>()
.join("\n\n")
}
pub fn file_list(&self) -> Vec<&str> {
let mut seen = std::collections::HashSet::new();
self.hunks
.iter()
.filter_map(|h| {
if seen.insert(h.file.as_str()) {
Some(h.file.as_str())
} else {
None
}
})
.collect()
}
}
pub fn group_into_batches(hunks: Vec<Hunk>, max_lines: usize) -> Vec<Batch> {
let mut batches = Vec::new();
let mut current_hunks = Vec::new();
let mut current_lines = 0;
for hunk in hunks {
if hunk.lines > max_lines {
if !current_hunks.is_empty() {
batches.push(Batch {
hunks: current_hunks,
total_lines: current_lines,
});
current_hunks = Vec::new();
current_lines = 0;
}
batches.push(Batch {
total_lines: hunk.lines,
hunks: vec![hunk],
});
continue;
}
if current_lines + hunk.lines > max_lines && !current_hunks.is_empty() {
batches.push(Batch {
hunks: current_hunks,
total_lines: current_lines,
});
current_hunks = Vec::new();
current_lines = 0;
}
current_lines += hunk.lines;
current_hunks.push(hunk);
}
if !current_hunks.is_empty() {
batches.push(Batch {
hunks: current_hunks,
total_lines: current_lines,
});
}
batches
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::*;
use crate::hunk::Hunk;
fn make_hunk(file: &str, lines: usize) -> Hunk {
let content = (0..lines)
.map(|i| format!("line {}", i))
.collect::<Vec<_>>()
.join("\n");
Hunk {
file: file.to_string(),
content,
lines,
}
}
#[test]
fn empty_input_returns_empty_output() {
let batches = group_into_batches(vec![], 500);
assert!(batches.is_empty(), "No hunks should produce no batches");
}
#[test]
fn single_small_hunk_returns_one_batch() {
let hunks = vec![make_hunk("a.c", 10)];
let batches = group_into_batches(hunks, 500);
assert_eq!(batches.len(), 1, "One small hunk should produce 1 batch");
assert_eq!(batches[0].hunks.len(), 1);
assert_eq!(batches[0].total_lines, 10);
}
#[test]
fn multiple_small_hunks_fit_one_batch() {
let hunks = vec![
make_hunk("a.c", 100),
make_hunk("b.c", 100),
make_hunk("c.c", 100),
];
let batches = group_into_batches(hunks, 500);
assert_eq!(
batches.len(),
1,
"Three hunks totaling 300 lines should fit in 1 batch (limit 500)"
);
assert_eq!(batches[0].hunks.len(), 3);
assert_eq!(batches[0].total_lines, 300);
}
#[test]
fn hunks_exceeding_limit_split_into_multiple_batches() {
let hunks = vec![make_hunk("a.c", 300), make_hunk("b.c", 300)];
let batches = group_into_batches(hunks, 500);
assert_eq!(
batches.len(),
2,
"Two hunks of 300 lines should split into 2 batches (limit 500)"
);
assert_eq!(batches[0].total_lines, 300);
assert_eq!(batches[1].total_lines, 300);
}
#[test]
fn oversized_hunk_goes_solo() {
let hunks = vec![
make_hunk("small.c", 50),
make_hunk("huge.c", 600),
make_hunk("tiny.c", 30),
];
let batches = group_into_batches(hunks, 500);
assert_eq!(
batches.len(),
3,
"Oversized hunk should be isolated in its own batch"
);
assert_eq!(batches[0].total_lines, 50, "First batch: small hunk");
assert_eq!(
batches[1].total_lines, 600,
"Second batch: oversized hunk alone"
);
assert_eq!(batches[2].total_lines, 30, "Third batch: tiny hunk");
}
#[test]
fn exact_limit_fits_in_one_batch() {
let hunks = vec![make_hunk("a.c", 250), make_hunk("b.c", 250)];
let batches = group_into_batches(hunks, 500);
assert_eq!(
batches.len(),
1,
"Two hunks totaling exactly 500 should fit in 1 batch"
);
assert_eq!(batches[0].total_lines, 500);
}
#[test]
fn one_line_over_limit_forces_new_batch() {
let hunks = vec![make_hunk("a.c", 250), make_hunk("b.c", 251)];
let batches = group_into_batches(hunks, 500);
assert_eq!(
batches.len(),
2,
"Two hunks totaling 501 should split into 2 batches"
);
}
#[test]
fn greedy_packing_fills_batches() {
let hunks: Vec<_> = (0..5)
.map(|i| make_hunk(&format!("file{}.c", i), 200))
.collect();
let batches = group_into_batches(hunks, 500);
assert_eq!(batches.len(), 3, "5x200 with limit 500 → 3 batches (2+2+1)");
assert_eq!(batches[0].total_lines, 400);
assert_eq!(batches[1].total_lines, 400);
assert_eq!(batches[2].total_lines, 200);
}
#[test]
fn batch_content_joins_hunks_with_blank_lines() {
let batch = Batch {
hunks: vec![make_hunk("a.c", 2), make_hunk("b.c", 2)],
total_lines: 4,
};
let content = batch.content();
assert!(
content.contains("\n\n"),
"Batch content should separate hunks with blank lines"
);
}
#[test]
fn file_list_returns_unique_files() {
let batch = Batch {
hunks: vec![
make_hunk("a.c", 10),
make_hunk("a.c", 20),
make_hunk("b.c", 15),
],
total_lines: 45,
};
let files = batch.file_list();
let unique: HashSet<&str> = files.iter().copied().collect();
assert_eq!(
unique.len(),
2,
"file_list should return deduplicated file paths"
);
assert!(unique.contains("a.c"));
assert!(unique.contains("b.c"));
}
#[test]
fn file_list_empty_batch_returns_empty() {
let batch = Batch {
hunks: vec![],
total_lines: 0,
};
let files = batch.file_list();
assert!(files.is_empty(), "Empty batch should have empty file list");
}
}