pub mod llm;
use crate::diff::DiffData;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
#[derive(Debug, Clone, Deserialize)]
pub struct GroupingResponse {
pub groups: Vec<SemanticGroup>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct SemanticGroup {
pub label: String,
#[serde(default)]
#[allow(dead_code)]
pub description: String,
#[serde(default)]
changes: Vec<GroupedChange>,
#[serde(default)]
files: Vec<String>,
}
impl SemanticGroup {
pub fn new(label: String, description: String, changes: Vec<GroupedChange>) -> Self {
Self {
label,
description,
changes,
files: vec![],
}
}
pub fn set_changes(&mut self, changes: Vec<GroupedChange>) {
self.changes = changes;
self.files.clear();
}
pub fn changes(&self) -> Vec<GroupedChange> {
if !self.changes.is_empty() {
return self.changes.clone();
}
self.files
.iter()
.map(|f| GroupedChange {
file: f.clone(),
hunks: vec![],
})
.collect()
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct GroupedChange {
pub file: String,
#[serde(default)]
pub hunks: Vec<usize>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum GroupingStatus {
Idle,
Loading,
Done,
Error(String),
}
const MAX_SUMMARY_CHARS: usize = 8000;
pub fn hunk_summaries(diff_data: &DiffData) -> String {
let mut out = String::new();
for f in &diff_data.files {
let path = f.target_file.trim_start_matches("b/");
let status = file_status(f);
out.push_str(&format!(
"FILE: {} ({}, +{} -{})\n",
path, status, f.added_count, f.removed_count
));
if f.is_untracked && out.len() < MAX_SUMMARY_CHARS {
out.push_str(&summarize_untracked_file(f));
} else {
append_hunk_samples(&mut out, f);
}
if out.len() >= MAX_SUMMARY_CHARS {
out.push_str("... (remaining files omitted for brevity)\n");
break;
}
}
out
}
fn file_status(f: &crate::diff::DiffFile) -> String {
if f.is_untracked {
"untracked/new".to_string()
} else if f.is_rename {
format!("renamed from {}", f.source_file.trim_start_matches("a/"))
} else if f.added_count > 0 && f.removed_count == 0 {
"added".to_string()
} else if f.removed_count > 0 && f.added_count == 0 {
"deleted".to_string()
} else {
"modified".to_string()
}
}
fn append_hunk_samples(out: &mut String, f: &crate::diff::DiffFile) {
for (hi, hunk) in f.hunks.iter().enumerate() {
out.push_str(&format!(" HUNK {}: {}\n", hi, hunk.header));
if out.len() < MAX_SUMMARY_CHARS {
let mut shown = 0;
for line in &hunk.lines {
if shown >= 4 {
out.push_str(" ...\n");
break;
}
match line.line_type {
crate::diff::LineType::Added => {
out.push_str(&format!(" + {}\n", truncate(&line.content, 60)));
shown += 1;
}
crate::diff::LineType::Removed => {
out.push_str(&format!(" - {}\n", truncate(&line.content, 60)));
shown += 1;
}
_ => {}
}
}
}
}
}
fn summarize_untracked_file(f: &crate::diff::DiffFile) -> String {
let all_lines: Vec<&str> = f
.hunks
.iter()
.flat_map(|h| h.lines.iter())
.filter(|l| l.line_type == crate::diff::LineType::Added)
.map(|l| l.content.as_str())
.collect();
let total = all_lines.len();
let mut out = String::new();
if total <= 12 {
for line in &all_lines {
out.push_str(&format!(" + {}\n", truncate(line, 80)));
}
return out;
}
const SAMPLE: usize = 4;
out.push_str(" [head]\n");
for line in all_lines.iter().take(SAMPLE) {
out.push_str(&format!(" + {}\n", truncate(line, 80)));
}
let mid_start = total / 2 - SAMPLE / 2;
out.push_str(&format!(" [mid ~line {}]\n", mid_start + 1));
for line in all_lines.iter().skip(mid_start).take(SAMPLE) {
out.push_str(&format!(" + {}\n", truncate(line, 80)));
}
let tail_start = total.saturating_sub(SAMPLE);
out.push_str(&format!(" [tail ~line {}]\n", tail_start + 1));
for line in all_lines.iter().skip(tail_start) {
out.push_str(&format!(" + {}\n", truncate(line, 80)));
}
out
}
pub fn compute_file_hash(file: &crate::diff::DiffFile) -> u64 {
let mut hasher = DefaultHasher::new();
for hunk in &file.hunks {
hunk.header.hash(&mut hasher);
for line in &hunk.lines {
let discriminant: u8 = match line.line_type {
crate::diff::LineType::Added => 0,
crate::diff::LineType::Removed => 1,
crate::diff::LineType::Context => 2,
};
discriminant.hash(&mut hasher);
line.content.hash(&mut hasher);
}
}
hasher.finish()
}
pub fn compute_all_file_hashes(diff_data: &DiffData) -> HashMap<String, u64> {
diff_data
.files
.iter()
.map(|f| {
let path = f.target_file.trim_start_matches("b/").to_string();
(path, compute_file_hash(f))
})
.collect()
}
#[derive(Debug, Clone, Serialize)]
pub struct DiffDelta {
pub new_files: Vec<String>,
pub removed_files: Vec<String>,
pub modified_files: Vec<String>,
pub unchanged_files: Vec<String>,
}
impl DiffDelta {
pub fn has_changes(&self) -> bool {
!self.new_files.is_empty()
|| !self.removed_files.is_empty()
|| !self.modified_files.is_empty()
}
pub fn is_only_removals(&self) -> bool {
self.new_files.is_empty()
&& self.modified_files.is_empty()
&& !self.removed_files.is_empty()
}
}
pub fn compute_diff_delta(
new_hashes: &HashMap<String, u64>,
previous_hashes: &HashMap<String, u64>,
) -> DiffDelta {
let mut new_files = Vec::new();
let mut modified_files = Vec::new();
let mut unchanged_files = Vec::new();
for (path, &new_hash) in new_hashes {
match previous_hashes.get(path) {
None => new_files.push(path.clone()),
Some(&prev_hash) if prev_hash != new_hash => modified_files.push(path.clone()),
_ => unchanged_files.push(path.clone()),
}
}
let removed_files = previous_hashes
.keys()
.filter(|p| !new_hashes.contains_key(*p))
.cloned()
.collect();
DiffDelta {
new_files,
removed_files,
modified_files,
unchanged_files,
}
}
pub fn incremental_hunk_summaries(
diff_data: &DiffData,
delta: &DiffDelta,
existing_groups: &[SemanticGroup],
) -> String {
let mut out = String::new();
if !existing_groups.is_empty() {
out.push_str(
"EXISTING GROUPS (for context \u{2014} assign new changes to these or create new groups):\n",
);
for (i, group) in existing_groups.iter().enumerate() {
let changes = group.changes();
let file_list: Vec<&str> = changes.iter().map(|c| c.file.as_str()).collect();
out.push_str(&format!(
"{}. \"{}\" \u{2014} files: {}\n",
i + 1,
group.label,
file_list.join(", ")
));
}
out.push('\n');
}
out.push_str("NEW/MODIFIED FILES TO GROUP:\n");
let include: std::collections::HashSet<&str> = delta
.new_files
.iter()
.chain(delta.modified_files.iter())
.map(|s| s.as_str())
.collect();
for f in &diff_data.files {
let path = f.target_file.trim_start_matches("b/");
if !include.contains(path) {
continue;
}
let status = file_status(f);
out.push_str(&format!(
"FILE: {} ({}, +{} -{})\n",
path, status, f.added_count, f.removed_count
));
if f.is_untracked && out.len() < MAX_SUMMARY_CHARS {
out.push_str(&summarize_untracked_file(f));
} else {
append_hunk_samples(&mut out, f);
}
if out.len() >= MAX_SUMMARY_CHARS {
out.push_str("... (remaining files omitted for brevity)\n");
break;
}
}
out
}
pub fn normalize_hunk_indices(groups: &mut [SemanticGroup], diff_data: &DiffData) {
let hunk_counts: HashMap<String, usize> = diff_data
.files
.iter()
.map(|f| {
let path = f.target_file.trim_start_matches("b/").to_string();
(path, f.hunks.len())
})
.collect();
for group in groups.iter_mut() {
let mut updated = group.changes();
for change in updated.iter_mut() {
if change.hunks.is_empty() {
if let Some(&count) = hunk_counts.get(&change.file) {
if count > 1 {
change.hunks = (0..count).collect();
}
}
}
}
group.set_changes(updated);
}
}
pub fn remove_files_from_groups(groups: &mut Vec<SemanticGroup>, files_to_remove: &[String]) {
if files_to_remove.is_empty() {
return;
}
let remove_set: std::collections::HashSet<&str> =
files_to_remove.iter().map(|s| s.as_str()).collect();
groups.retain_mut(|group| {
let filtered: Vec<GroupedChange> = group
.changes()
.into_iter()
.filter(|c| !remove_set.contains(c.file.as_str()))
.collect();
group.set_changes(filtered);
!group.changes().is_empty()
});
}
pub fn merge_groups(
existing: &[SemanticGroup],
new_assignments: &[SemanticGroup],
delta: &DiffDelta,
) -> Vec<SemanticGroup> {
let mut merged: Vec<SemanticGroup> = existing.to_vec();
let stale: Vec<String> = delta
.removed_files
.iter()
.chain(delta.modified_files.iter())
.cloned()
.collect();
remove_files_from_groups(&mut merged, &stale);
for new_group in new_assignments {
let new_changes = new_group.changes();
if new_changes.is_empty() {
continue;
}
let existing_pos = merged
.iter()
.position(|g| g.label.to_lowercase() == new_group.label.to_lowercase());
if let Some(pos) = existing_pos {
let mut combined = merged[pos].changes();
combined.extend(new_changes);
merged[pos].set_changes(combined);
} else {
merged.push(new_group.clone());
}
}
merged.retain(|g| !g.changes().is_empty());
merged
}
fn truncate(s: &str, max: usize) -> &str {
if s.len() <= max {
s
} else {
let mut end = max;
while end > 0 && !s.is_char_boundary(end) {
end -= 1;
}
&s[..end]
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_truncate_ascii() {
assert_eq!(truncate("hello", 3), "hel");
}
#[test]
fn test_truncate_shorter_than_max() {
assert_eq!(truncate("hi", 10), "hi");
}
#[test]
fn test_truncate_cjk_at_boundary_no_panic() {
let s = "\u{4e16}\u{754c}\u{4f60}\u{597d}"; let result = truncate(s, 4);
assert_eq!(result, "\u{4e16}"); }
#[test]
fn test_truncate_emoji_at_boundary_no_panic() {
let s = "a🦀b"; let result = truncate(s, 3);
assert_eq!(result, "a"); }
#[test]
fn test_truncate_exact_boundary() {
assert_eq!(truncate("hello", 5), "hello");
}
#[test]
fn test_truncate_zero() {
assert_eq!(truncate("hello", 0), "");
}
}