Skip to main content

difflore_core/review/
diff_context.rs

1//! Token-aware-ish packing for already collected PR diffs.
2//!
3//! This module deliberately starts after the existing PR fetch and
4//! merge-base diff step. It never shells out, fetches refs, or decides which
5//! commits belong to a PR; callers pass file records that were already
6//! produced from a merge-base diff.
7
8use std::cmp::Ordering;
9
10/// Caller intent for ordering records before fitting them into a budget.
11#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
12pub enum DiffContextMode {
13    /// Prefer highly relevant files, then smaller files for broad review
14    /// coverage.
15    #[default]
16    ReviewExtraction,
17    /// Prefer highly relevant files, then files with more changed lines,
18    /// while still fitting smaller records first when otherwise tied.
19    FixPr,
20}
21
22/// Change kind for a diff record.
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum DiffContextFileChange {
25    Added,
26    Modified,
27    Renamed,
28    Deleted,
29}
30
31impl DiffContextFileChange {
32    pub const fn as_str(self) -> &'static str {
33        match self {
34            Self::Added => "added",
35            Self::Modified => "modified",
36            Self::Renamed => "renamed",
37            Self::Deleted => "deleted",
38        }
39    }
40}
41
42/// One file-level diff record produced by an upstream merge-base diff.
43#[derive(Debug, Clone, Copy)]
44pub struct DiffContextFile<'a> {
45    pub path: &'a str,
46    pub patch: &'a str,
47    /// Higher values are packed earlier. Use zero when no external
48    /// relevance signal is available.
49    pub relevance: u16,
50    pub change: DiffContextFileChange,
51}
52
53impl<'a> DiffContextFile<'a> {
54    pub const fn new(path: &'a str, patch: &'a str) -> Self {
55        Self {
56            path,
57            patch,
58            relevance: 0,
59            change: DiffContextFileChange::Modified,
60        }
61    }
62}
63
64/// Packing options.
65#[derive(Debug, Clone, Copy, Default)]
66pub struct DiffContextOptions {
67    /// Maximum character count for the packed diff text. This is deliberately
68    /// a character budget, not a real tokenizer.
69    pub char_budget: Option<usize>,
70    pub mode: DiffContextMode,
71}
72
73/// A file included in the packed context.
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct PackedDiffFile {
76    pub path: String,
77    pub change: DiffContextFileChange,
78    pub relevance: u16,
79    pub original_chars: usize,
80    pub included_chars: usize,
81    pub additions: usize,
82    pub deletions: usize,
83    pub truncated: bool,
84}
85
86/// Why a file appears in the summary list.
87#[derive(Debug, Clone, Copy, PartialEq, Eq)]
88pub enum DiffContextSummaryReason {
89    DeletedFile,
90    EmptyPatch,
91    OmittedForBudget,
92    TruncatedForBudget,
93}
94
95/// Summary for a deleted, omitted, or truncated file.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct DiffContextSummary {
98    pub path: String,
99    pub change: DiffContextFileChange,
100    pub reason: DiffContextSummaryReason,
101    pub original_chars: usize,
102    pub included_chars: usize,
103    pub additions: usize,
104    pub deletions: usize,
105    pub summary: String,
106}
107
108/// Packed diff text plus bookkeeping for files that were not fully included.
109#[derive(Debug, Clone, PartialEq, Eq)]
110pub struct PackedDiffContext {
111    pub text: String,
112    pub included_files: Vec<PackedDiffFile>,
113    pub summaries: Vec<DiffContextSummary>,
114    pub char_budget: Option<usize>,
115    pub packed_chars: usize,
116    pub original_chars: usize,
117}
118
119/// Pack file-level diff records into a deterministic context block.
120///
121/// The algorithm is intentionally small:
122/// 1. Sort records by mode-specific priority, using relevance and patch size.
123/// 2. Include full file patches while they fit the optional character budget.
124/// 3. When a high-priority file does not fit, include a compact patch made of
125///    file/hunk headers, changed lines, and adjacent context lines.
126/// 4. Return summaries for deleted, omitted, empty, and truncated records.
127pub fn pack_diff_context(
128    files: &[DiffContextFile<'_>],
129    options: DiffContextOptions,
130) -> PackedDiffContext {
131    let mut ordered: Vec<(usize, &DiffContextFile<'_>)> = files.iter().enumerate().collect();
132    ordered.sort_by(|(a_idx, a), (b_idx, b)| compare_files(a, *a_idx, b, *b_idx, options.mode));
133
134    let mut text = String::new();
135    let mut included_files = Vec::new();
136    let mut summaries = Vec::new();
137    let mut packed_chars = 0usize;
138    let mut original_chars = 0usize;
139
140    for (_idx, file) in ordered {
141        let path = file.path.trim();
142        let patch = file.patch.trim_end();
143        let change = effective_change(file);
144        let patch_chars = char_count(patch);
145        let (additions, deletions) = count_changed_lines(patch);
146        original_chars = original_chars.saturating_add(patch_chars);
147
148        if path.is_empty() || patch.trim().is_empty() {
149            summaries.push(build_summary(
150                path,
151                change,
152                DiffContextSummaryReason::EmptyPatch,
153                patch_chars,
154                0,
155                additions,
156                deletions,
157            ));
158            continue;
159        }
160
161        if change == DiffContextFileChange::Deleted {
162            summaries.push(build_summary(
163                path,
164                change,
165                DiffContextSummaryReason::DeletedFile,
166                patch_chars,
167                0,
168                additions,
169                deletions,
170            ));
171            continue;
172        }
173
174        let section = render_file_section(path, patch);
175        let section_chars = char_count(&section);
176        if fits_budget(packed_chars, section_chars, options.char_budget) {
177            text.push_str(&section);
178            packed_chars = packed_chars.saturating_add(section_chars);
179            included_files.push(PackedDiffFile {
180                path: path.to_owned(),
181                change,
182                relevance: file.relevance,
183                original_chars: patch_chars,
184                included_chars: section_chars,
185                additions,
186                deletions,
187                truncated: false,
188            });
189            continue;
190        }
191
192        let Some(char_budget) = options.char_budget else {
193            continue;
194        };
195        let remaining = char_budget.saturating_sub(packed_chars);
196        if let Some(compact_section) = render_compact_file_section(path, patch, remaining) {
197            let compact_chars = char_count(&compact_section);
198            text.push_str(&compact_section);
199            packed_chars = packed_chars.saturating_add(compact_chars);
200            included_files.push(PackedDiffFile {
201                path: path.to_owned(),
202                change,
203                relevance: file.relevance,
204                original_chars: patch_chars,
205                included_chars: compact_chars,
206                additions,
207                deletions,
208                truncated: true,
209            });
210            summaries.push(build_summary(
211                path,
212                change,
213                DiffContextSummaryReason::TruncatedForBudget,
214                patch_chars,
215                compact_chars,
216                additions,
217                deletions,
218            ));
219        } else {
220            summaries.push(build_summary(
221                path,
222                change,
223                DiffContextSummaryReason::OmittedForBudget,
224                patch_chars,
225                0,
226                additions,
227                deletions,
228            ));
229        }
230    }
231
232    PackedDiffContext {
233        text,
234        included_files,
235        summaries,
236        char_budget: options.char_budget,
237        packed_chars,
238        original_chars,
239    }
240}
241
242fn compare_files(
243    a: &DiffContextFile<'_>,
244    a_idx: usize,
245    b: &DiffContextFile<'_>,
246    b_idx: usize,
247    mode: DiffContextMode,
248) -> Ordering {
249    let a_change = effective_change(a);
250    let b_change = effective_change(b);
251    let a_active_rank = active_rank(a_change);
252    let b_active_rank = active_rank(b_change);
253    let a_chars = char_count(a.patch.trim_end());
254    let b_chars = char_count(b.patch.trim_end());
255    let a_changed = changed_line_total(a.patch);
256    let b_changed = changed_line_total(b.patch);
257    let a_path = a.path.trim();
258    let b_path = b.path.trim();
259
260    match mode {
261        DiffContextMode::ReviewExtraction => b
262            .relevance
263            .cmp(&a.relevance)
264            .then_with(|| a_active_rank.cmp(&b_active_rank))
265            .then_with(|| a_chars.cmp(&b_chars))
266            .then_with(|| a_path.cmp(b_path))
267            .then_with(|| a_idx.cmp(&b_idx)),
268        DiffContextMode::FixPr => b
269            .relevance
270            .cmp(&a.relevance)
271            .then_with(|| a_active_rank.cmp(&b_active_rank))
272            .then_with(|| b_changed.cmp(&a_changed))
273            .then_with(|| a_chars.cmp(&b_chars))
274            .then_with(|| a_path.cmp(b_path))
275            .then_with(|| a_idx.cmp(&b_idx)),
276    }
277}
278
279const fn active_rank(change: DiffContextFileChange) -> u8 {
280    match change {
281        DiffContextFileChange::Deleted => 1,
282        DiffContextFileChange::Added
283        | DiffContextFileChange::Modified
284        | DiffContextFileChange::Renamed => 0,
285    }
286}
287
288fn effective_change(file: &DiffContextFile<'_>) -> DiffContextFileChange {
289    if file.change == DiffContextFileChange::Deleted || patch_indicates_deleted_file(file.patch) {
290        DiffContextFileChange::Deleted
291    } else {
292        file.change
293    }
294}
295
296fn fits_budget(current_chars: usize, added_chars: usize, budget: Option<usize>) -> bool {
297    budget.is_none_or(|limit| current_chars.saturating_add(added_chars) <= limit)
298}
299
300fn render_file_section(path: &str, patch: &str) -> String {
301    let mut section = String::new();
302    section.push_str("\n\n## File: ");
303    section.push_str(path);
304    section.push_str("\n\n```diff\n");
305    section.push_str(patch.trim_end());
306    section.push_str("\n```\n");
307    section
308}
309
310fn render_compact_file_section(path: &str, patch: &str, max_chars: usize) -> Option<String> {
311    const TRUNCATED_MARKER: &str = "... [diff context truncated]\n";
312    let prefix = format!("\n\n## File: {path}\n\n```diff\n");
313    let suffix = "```\n";
314    let separator = "\n";
315    let overhead = char_count(&prefix)
316        .saturating_add(char_count(separator))
317        .saturating_add(char_count(TRUNCATED_MARKER))
318        .saturating_add(char_count(suffix));
319    if max_chars <= overhead {
320        return None;
321    }
322
323    let patch_budget = max_chars.saturating_sub(overhead);
324    let compact_patch = compact_patch_lines(patch, patch_budget);
325    if compact_patch.trim().is_empty() {
326        return None;
327    }
328
329    let mut section = prefix;
330    section.push_str(compact_patch.trim_end());
331    section.push_str(separator);
332    section.push_str(TRUNCATED_MARKER);
333    section.push_str(suffix);
334
335    (char_count(&section) <= max_chars).then_some(section)
336}
337
338fn compact_patch_lines(patch: &str, max_chars: usize) -> String {
339    if max_chars == 0 {
340        return String::new();
341    }
342
343    let lines: Vec<&str> = patch.trim_end().lines().collect();
344    if lines.is_empty() {
345        return String::new();
346    }
347
348    let mut keep = vec![false; lines.len()];
349    for (idx, line) in lines.iter().enumerate() {
350        if is_key_patch_line(line) {
351            keep[idx] = true;
352            if idx > 0 && is_context_line(lines[idx - 1]) {
353                keep[idx - 1] = true;
354            }
355            if idx + 1 < lines.len() && is_context_line(lines[idx + 1]) {
356                keep[idx + 1] = true;
357            }
358        }
359    }
360
361    if !keep.iter().any(|keep_line| *keep_line) {
362        return take_chars(patch.trim(), max_chars);
363    }
364
365    let mut out = String::new();
366    let mut out_chars = 0usize;
367    let mut skipped = false;
368    let mut included_any = false;
369
370    for (idx, line) in lines.iter().enumerate() {
371        if !keep[idx] {
372            skipped = true;
373            continue;
374        }
375
376        if skipped && included_any && try_push_line(&mut out, &mut out_chars, "...", max_chars) {
377            skipped = false;
378        }
379
380        if try_push_line(&mut out, &mut out_chars, line, max_chars) {
381            included_any = true;
382            continue;
383        }
384
385        if !included_any {
386            push_partial_line(&mut out, &mut out_chars, line, max_chars);
387        }
388        break;
389    }
390
391    out.trim_end().to_owned()
392}
393
394fn try_push_line(out: &mut String, out_chars: &mut usize, line: &str, max_chars: usize) -> bool {
395    let needed = char_count(line).saturating_add(1);
396    if out_chars.saturating_add(needed) > max_chars {
397        return false;
398    }
399    out.push_str(line);
400    out.push('\n');
401    *out_chars = out_chars.saturating_add(needed);
402    true
403}
404
405fn push_partial_line(out: &mut String, out_chars: &mut usize, line: &str, max_chars: usize) {
406    let remaining = max_chars.saturating_sub(*out_chars);
407    if remaining == 0 {
408        return;
409    }
410    let line_part = if remaining > 1 {
411        take_chars(line, remaining - 1)
412    } else {
413        String::new()
414    };
415    out.push_str(&line_part);
416    if remaining > 1 {
417        out.push('\n');
418    }
419    *out_chars = max_chars;
420}
421
422fn is_key_patch_line(line: &str) -> bool {
423    line.starts_with("diff --git ")
424        || line.starts_with("index ")
425        || line.starts_with("old mode ")
426        || line.starts_with("new mode ")
427        || line.starts_with("new file mode ")
428        || line.starts_with("deleted file mode ")
429        || line.starts_with("similarity index ")
430        || line.starts_with("rename from ")
431        || line.starts_with("rename to ")
432        || line.starts_with("--- ")
433        || line.starts_with("+++ ")
434        || line.starts_with("@@ ")
435        || line.starts_with("Binary files ")
436        || is_changed_line(line)
437}
438
439fn is_context_line(line: &str) -> bool {
440    line.starts_with(' ')
441}
442
443fn is_changed_line(line: &str) -> bool {
444    (line.starts_with('+') && !line.starts_with("+++"))
445        || (line.starts_with('-') && !line.starts_with("---"))
446}
447
448fn count_changed_lines(patch: &str) -> (usize, usize) {
449    let mut additions = 0usize;
450    let mut deletions = 0usize;
451    for line in patch.lines() {
452        if line.starts_with('+') && !line.starts_with("+++") {
453            additions = additions.saturating_add(1);
454        } else if line.starts_with('-') && !line.starts_with("---") {
455            deletions = deletions.saturating_add(1);
456        }
457    }
458    (additions, deletions)
459}
460
461fn changed_line_total(patch: &str) -> usize {
462    let (additions, deletions) = count_changed_lines(patch);
463    additions.saturating_add(deletions)
464}
465
466fn patch_indicates_deleted_file(patch: &str) -> bool {
467    patch
468        .lines()
469        .any(|line| line.trim() == "+++ /dev/null" || line.starts_with("deleted file mode "))
470}
471
472fn build_summary(
473    path: &str,
474    change: DiffContextFileChange,
475    reason: DiffContextSummaryReason,
476    original_chars: usize,
477    included_chars: usize,
478    additions: usize,
479    deletions: usize,
480) -> DiffContextSummary {
481    let reason_text = match reason {
482        DiffContextSummaryReason::DeletedFile => "summarized because the file was deleted",
483        DiffContextSummaryReason::EmptyPatch => "omitted because the patch was empty",
484        DiffContextSummaryReason::OmittedForBudget => {
485            "deferred because the char budget was exhausted"
486        }
487        DiffContextSummaryReason::TruncatedForBudget => {
488            "partially included with key patch context because the full patch exceeded budget"
489        }
490    };
491    let summary = format!(
492        "{} ({}, +{}, -{}, {} chars): {}",
493        path,
494        change.as_str(),
495        additions,
496        deletions,
497        original_chars,
498        reason_text
499    );
500
501    DiffContextSummary {
502        path: path.to_owned(),
503        change,
504        reason,
505        original_chars,
506        included_chars,
507        additions,
508        deletions,
509        summary,
510    }
511}
512
513fn char_count(s: &str) -> usize {
514    s.chars().count()
515}
516
517fn take_chars(s: &str, max_chars: usize) -> String {
518    s.chars().take(max_chars).collect()
519}