1pub mod llm;
2
3use crate::diff::DiffData;
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6use std::collections::hash_map::DefaultHasher;
7use std::hash::{Hash, Hasher};
8
9#[derive(Debug, Clone, Deserialize)]
11pub struct GroupingResponse {
12 pub groups: Vec<SemanticGroup>,
13}
14
15#[derive(Debug, Clone, Deserialize)]
18pub struct SemanticGroup {
19 pub label: String,
20 #[serde(default)]
21 #[allow(dead_code)]
22 pub description: String,
23 #[serde(default)]
25 changes: Vec<GroupedChange>,
26 #[serde(default)]
28 files: Vec<String>,
29}
30
31impl SemanticGroup {
32 pub fn new(label: String, description: String, changes: Vec<GroupedChange>) -> Self {
34 Self {
35 label,
36 description,
37 changes,
38 files: vec![],
39 }
40 }
41
42 pub fn set_changes(&mut self, changes: Vec<GroupedChange>) {
44 self.changes = changes;
45 self.files.clear();
46 }
47
48 pub fn changes(&self) -> Vec<GroupedChange> {
50 if !self.changes.is_empty() {
51 return self.changes.clone();
52 }
53 self.files
55 .iter()
56 .map(|f| GroupedChange {
57 file: f.clone(),
58 hunks: vec![],
59 })
60 .collect()
61 }
62}
63
64#[derive(Debug, Clone, Deserialize)]
66pub struct GroupedChange {
67 pub file: String,
68 #[serde(default)]
70 pub hunks: Vec<usize>,
71}
72
73#[derive(Debug, Clone, PartialEq)]
75pub enum GroupingStatus {
76 Idle,
78 Loading,
80 Done,
82 Error(String),
84}
85
86const MAX_SUMMARY_CHARS: usize = 8000;
100
101pub fn hunk_summaries(diff_data: &DiffData) -> String {
102 let mut out = String::new();
103 for f in &diff_data.files {
104 let path = f.target_file.trim_start_matches("b/");
105 let status = if f.is_rename {
106 format!("renamed from {}", f.source_file.trim_start_matches("a/"))
107 } else if f.added_count > 0 && f.removed_count == 0 {
108 "added".to_string()
109 } else if f.removed_count > 0 && f.added_count == 0 {
110 "deleted".to_string()
111 } else {
112 "modified".to_string()
113 };
114 out.push_str(&format!(
115 "FILE: {} ({}, +{} -{})\n",
116 path, status, f.added_count, f.removed_count
117 ));
118
119 for (hi, hunk) in f.hunks.iter().enumerate() {
120 out.push_str(&format!(" HUNK {}: {}\n", hi, hunk.header));
121
122 if out.len() < MAX_SUMMARY_CHARS {
124 let mut shown = 0;
125 for line in &hunk.lines {
126 if shown >= 4 {
127 out.push_str(" ...\n");
128 break;
129 }
130 match line.line_type {
131 crate::diff::LineType::Added => {
132 out.push_str(&format!(" + {}\n", truncate(&line.content, 60)));
133 shown += 1;
134 }
135 crate::diff::LineType::Removed => {
136 out.push_str(&format!(" - {}\n", truncate(&line.content, 60)));
137 shown += 1;
138 }
139 _ => {}
140 }
141 }
142 }
143 }
144
145 if out.len() >= MAX_SUMMARY_CHARS {
146 out.push_str("... (remaining files omitted for brevity)\n");
147 break;
148 }
149 }
150 out
151}
152
153pub fn compute_file_hash(file: &crate::diff::DiffFile) -> u64 {
156 let mut hasher = DefaultHasher::new();
157 for hunk in &file.hunks {
158 hunk.header.hash(&mut hasher);
159 for line in &hunk.lines {
160 let discriminant: u8 = match line.line_type {
162 crate::diff::LineType::Added => 0,
163 crate::diff::LineType::Removed => 1,
164 crate::diff::LineType::Context => 2,
165 };
166 discriminant.hash(&mut hasher);
167 line.content.hash(&mut hasher);
168 }
169 }
170 hasher.finish()
171}
172
173pub fn compute_all_file_hashes(diff_data: &DiffData) -> HashMap<String, u64> {
175 diff_data
176 .files
177 .iter()
178 .map(|f| {
179 let path = f.target_file.trim_start_matches("b/").to_string();
180 (path, compute_file_hash(f))
181 })
182 .collect()
183}
184
185#[derive(Debug, Clone, Serialize)]
187pub struct DiffDelta {
188 pub new_files: Vec<String>,
190 pub removed_files: Vec<String>,
192 pub modified_files: Vec<String>,
194 pub unchanged_files: Vec<String>,
196}
197
198impl DiffDelta {
199 pub fn has_changes(&self) -> bool {
200 !self.new_files.is_empty()
201 || !self.removed_files.is_empty()
202 || !self.modified_files.is_empty()
203 }
204
205 pub fn is_only_removals(&self) -> bool {
206 self.new_files.is_empty()
207 && self.modified_files.is_empty()
208 && !self.removed_files.is_empty()
209 }
210}
211
212pub fn compute_diff_delta(
214 new_hashes: &HashMap<String, u64>,
215 previous_hashes: &HashMap<String, u64>,
216) -> DiffDelta {
217 let mut new_files = Vec::new();
218 let mut modified_files = Vec::new();
219 let mut unchanged_files = Vec::new();
220
221 for (path, &new_hash) in new_hashes {
222 match previous_hashes.get(path) {
223 None => new_files.push(path.clone()),
224 Some(&prev_hash) if prev_hash != new_hash => modified_files.push(path.clone()),
225 _ => unchanged_files.push(path.clone()),
226 }
227 }
228
229 let removed_files = previous_hashes
230 .keys()
231 .filter(|p| !new_hashes.contains_key(*p))
232 .cloned()
233 .collect();
234
235 DiffDelta {
236 new_files,
237 removed_files,
238 modified_files,
239 unchanged_files,
240 }
241}
242
243pub fn incremental_hunk_summaries(
256 diff_data: &DiffData,
257 delta: &DiffDelta,
258 existing_groups: &[SemanticGroup],
259) -> String {
260 let mut out = String::new();
261
262 if !existing_groups.is_empty() {
264 out.push_str(
265 "EXISTING GROUPS (for context \u{2014} assign new changes to these or create new groups):\n",
266 );
267 for (i, group) in existing_groups.iter().enumerate() {
268 let changes = group.changes();
269 let file_list: Vec<&str> = changes.iter().map(|c| c.file.as_str()).collect();
270 out.push_str(&format!(
271 "{}. \"{}\" \u{2014} files: {}\n",
272 i + 1,
273 group.label,
274 file_list.join(", ")
275 ));
276 }
277 out.push('\n');
278 }
279
280 out.push_str("NEW/MODIFIED FILES TO GROUP:\n");
281
282 let include: std::collections::HashSet<&str> = delta
284 .new_files
285 .iter()
286 .chain(delta.modified_files.iter())
287 .map(|s| s.as_str())
288 .collect();
289
290 for f in &diff_data.files {
291 let path = f.target_file.trim_start_matches("b/");
292 if !include.contains(path) {
293 continue;
294 }
295
296 let status = if f.is_rename {
297 format!("renamed from {}", f.source_file.trim_start_matches("a/"))
298 } else if f.added_count > 0 && f.removed_count == 0 {
299 "added".to_string()
300 } else if f.removed_count > 0 && f.added_count == 0 {
301 "deleted".to_string()
302 } else {
303 "modified".to_string()
304 };
305 out.push_str(&format!(
306 "FILE: {} ({}, +{} -{})\n",
307 path, status, f.added_count, f.removed_count
308 ));
309
310 for (hi, hunk) in f.hunks.iter().enumerate() {
311 out.push_str(&format!(" HUNK {}: {}\n", hi, hunk.header));
312
313 if out.len() < MAX_SUMMARY_CHARS {
314 let mut shown = 0;
315 for line in &hunk.lines {
316 if shown >= 4 {
317 out.push_str(" ...\n");
318 break;
319 }
320 match line.line_type {
321 crate::diff::LineType::Added => {
322 out.push_str(&format!(" + {}\n", truncate(&line.content, 60)));
323 shown += 1;
324 }
325 crate::diff::LineType::Removed => {
326 out.push_str(&format!(" - {}\n", truncate(&line.content, 60)));
327 shown += 1;
328 }
329 _ => {}
330 }
331 }
332 }
333 }
334
335 if out.len() >= MAX_SUMMARY_CHARS {
336 out.push_str("... (remaining files omitted for brevity)\n");
337 break;
338 }
339 }
340
341 out
342}
343
344pub fn normalize_hunk_indices(groups: &mut [SemanticGroup], diff_data: &DiffData) {
347 let hunk_counts: HashMap<String, usize> = diff_data
349 .files
350 .iter()
351 .map(|f| {
352 let path = f.target_file.trim_start_matches("b/").to_string();
353 (path, f.hunks.len())
354 })
355 .collect();
356
357 for group in groups.iter_mut() {
358 let mut updated = group.changes();
359 for change in updated.iter_mut() {
360 if change.hunks.is_empty() {
361 if let Some(&count) = hunk_counts.get(&change.file) {
362 if count > 1 {
363 change.hunks = (0..count).collect();
364 }
365 }
366 }
367 }
368 group.set_changes(updated);
369 }
370}
371
372pub fn remove_files_from_groups(groups: &mut Vec<SemanticGroup>, files_to_remove: &[String]) {
375 if files_to_remove.is_empty() {
376 return;
377 }
378 let remove_set: std::collections::HashSet<&str> =
379 files_to_remove.iter().map(|s| s.as_str()).collect();
380
381 groups.retain_mut(|group| {
382 let filtered: Vec<GroupedChange> = group
383 .changes()
384 .into_iter()
385 .filter(|c| !remove_set.contains(c.file.as_str()))
386 .collect();
387 group.set_changes(filtered);
388 !group.changes().is_empty()
389 });
390}
391
392pub fn merge_groups(
402 existing: &[SemanticGroup],
403 new_assignments: &[SemanticGroup],
404 delta: &DiffDelta,
405) -> Vec<SemanticGroup> {
406 let mut merged: Vec<SemanticGroup> = existing.to_vec();
407
408 let stale: Vec<String> = delta
410 .removed_files
411 .iter()
412 .chain(delta.modified_files.iter())
413 .cloned()
414 .collect();
415 remove_files_from_groups(&mut merged, &stale);
416
417 for new_group in new_assignments {
419 let new_changes = new_group.changes();
420 if new_changes.is_empty() {
421 continue;
422 }
423
424 let existing_pos = merged
426 .iter()
427 .position(|g| g.label.to_lowercase() == new_group.label.to_lowercase());
428
429 if let Some(pos) = existing_pos {
430 let mut combined = merged[pos].changes();
431 combined.extend(new_changes);
432 merged[pos].set_changes(combined);
433 } else {
434 merged.push(new_group.clone());
435 }
436 }
437
438 merged.retain(|g| !g.changes().is_empty());
440
441 merged
442}
443
444fn truncate(s: &str, max: usize) -> &str {
447 if s.len() <= max {
448 s
449 } else {
450 let mut end = max;
452 while end > 0 && !s.is_char_boundary(end) {
453 end -= 1;
454 }
455 &s[..end]
456 }
457}
458
459#[cfg(test)]
460mod tests {
461 use super::*;
462
463 #[test]
464 fn test_truncate_ascii() {
465 assert_eq!(truncate("hello", 3), "hel");
466 }
467
468 #[test]
469 fn test_truncate_shorter_than_max() {
470 assert_eq!(truncate("hi", 10), "hi");
471 }
472
473 #[test]
474 fn test_truncate_cjk_at_boundary_no_panic() {
475 let s = "\u{4e16}\u{754c}\u{4f60}\u{597d}"; let result = truncate(s, 4);
479 assert_eq!(result, "\u{4e16}"); }
481
482 #[test]
483 fn test_truncate_emoji_at_boundary_no_panic() {
484 let s = "a🦀b"; let result = truncate(s, 3);
488 assert_eq!(result, "a"); }
490
491 #[test]
492 fn test_truncate_exact_boundary() {
493 assert_eq!(truncate("hello", 5), "hello");
494 }
495
496 #[test]
497 fn test_truncate_zero() {
498 assert_eq!(truncate("hello", 0), "");
499 }
500}