Skip to main content

aft/compress/
git.rs

1use std::collections::HashSet;
2
3use crate::compress::generic::{dedup_consecutive, middle_truncate, GenericCompressor};
4use crate::compress::Compressor;
5
6const STATUS_SHORT_LIMIT: usize = 1024;
7const STATUS_KEEP_PER_SECTION: usize = 10;
8const DIFF_MAX_FILES: usize = 5;
9const DIFF_MAX_HUNKS: usize = 20;
10const HUNK_KEEP_LINES: usize = 30;
11const LOG_KEEP_COMMITS: usize = 20;
12const BLAME_KEEP_LINES: usize = 50;
13
14pub struct GitCompressor;
15
16impl Compressor for GitCompressor {
17    fn matches(&self, command: &str) -> bool {
18        command_head(command).is_some_and(|head| head == "git")
19    }
20
21    fn compress(&self, command: &str, output: &str) -> String {
22        match git_subcommand(command).as_deref() {
23            Some("status") => compress_status(output),
24            Some("diff") => compress_diff(output, false),
25            Some("log") => compress_log(output),
26            Some("show") => compress_diff(output, true),
27            Some("branch") => trim_trailing_lines(&dedup_consecutive(output)),
28            Some("blame") => compress_blame(output),
29            _ => GenericCompressor::compress_output(output),
30        }
31    }
32}
33
34fn command_head(command: &str) -> Option<&str> {
35    command.split_whitespace().next()
36}
37
38fn git_subcommand(command: &str) -> Option<String> {
39    let mut seen_git = false;
40    for token in command.split_whitespace() {
41        if !seen_git {
42            if token == "git" {
43                seen_git = true;
44            }
45            continue;
46        }
47        if token.starts_with('-') || token.contains('=') {
48            continue;
49        }
50        return Some(token.to_string());
51    }
52    None
53}
54
55fn compress_status(output: &str) -> String {
56    if output.len() <= STATUS_SHORT_LIMIT {
57        return trim_trailing_lines(output);
58    }
59
60    let mut result = Vec::new();
61    let mut section_entries = Vec::new();
62    let mut in_section = false;
63
64    for line in output.lines() {
65        if is_status_section_header(line) {
66            flush_status_entries(&mut result, &mut section_entries);
67            result.push(line.to_string());
68            in_section = true;
69        } else if in_section && is_status_instructional(line) {
70            // Lines like `  (use "git add <file>..." to include in what will be
71            // committed)` come right after the section header in real git
72            // output. They're informational, not entries — pass them through
73            // verbatim WITHOUT resetting `in_section` so the entries that
74            // follow still get aggregated and summarized.
75            result.push(line.to_string());
76        } else if in_section && is_status_entry(line) {
77            section_entries.push(line.to_string());
78        } else {
79            flush_status_entries(&mut result, &mut section_entries);
80            result.push(line.to_string());
81            in_section = false;
82        }
83    }
84    flush_status_entries(&mut result, &mut section_entries);
85
86    trim_trailing_lines(&result.join("\n"))
87}
88
89fn is_status_section_header(line: &str) -> bool {
90    matches!(
91        line.trim_end_matches(':'),
92        "Changes to be committed"
93            | "Changes not staged for commit"
94            | "Untracked files"
95            | "Unmerged paths"
96    )
97}
98
99/// Recognize the parenthesized instructional lines git emits inside a status
100/// section, e.g. `  (use "git add <file>..." to include in what will be committed)`.
101/// These come right after the section header and must NOT reset the
102/// in-section state, otherwise the actual entries that follow are missed by
103/// the entry aggregator.
104fn is_status_instructional(line: &str) -> bool {
105    let trimmed = line.trim_start();
106    trimmed.starts_with('(') || trimmed.starts_with("use ")
107}
108
109fn is_status_entry(line: &str) -> bool {
110    let trimmed = line.trim_start();
111    trimmed.starts_with("modified:")
112        || trimmed.starts_with("new file:")
113        || trimmed.starts_with("deleted:")
114        || trimmed.starts_with("renamed:")
115        || trimmed.starts_with("copied:")
116        || trimmed.starts_with("both modified:")
117        || trimmed.starts_with("both added:")
118        || trimmed.starts_with("deleted by us:")
119        || trimmed.starts_with("deleted by them:")
120        || (!trimmed.is_empty()
121            && !trimmed.starts_with('(')
122            && !trimmed.starts_with("use ")
123            && !trimmed.starts_with("no changes"))
124}
125
126fn flush_status_entries(result: &mut Vec<String>, entries: &mut Vec<String>) {
127    if entries.is_empty() {
128        return;
129    }
130
131    let keep = entries.len().min(STATUS_KEEP_PER_SECTION);
132    result.extend(entries.iter().take(keep).cloned());
133    if entries.len() > keep {
134        result.push(format!("... and {} more", entries.len() - keep));
135    }
136    entries.clear();
137}
138
139fn compress_diff(output: &str, keep_commit_header: bool) -> String {
140    let files = split_diff_files(output, keep_commit_header);
141    let total_hunks: usize = files.iter().map(|file| count_hunks(&file.lines)).sum();
142
143    if files.is_empty() || total_hunks <= 2 && output.len() <= 5 * 1024 {
144        return trim_trailing_lines(output);
145    }
146
147    let max_files = if total_hunks > DIFF_MAX_HUNKS {
148        DIFF_MAX_FILES
149    } else {
150        usize::MAX
151    };
152
153    let mut result = Vec::new();
154    let mut emitted_files = 0usize;
155
156    for file in &files {
157        if file.is_diff && emitted_files >= max_files {
158            continue;
159        }
160        result.extend(compress_diff_file(&file.lines));
161        emitted_files += usize::from(file.is_diff);
162    }
163
164    let changed_files = files.iter().filter(|file| file.is_diff).count();
165    if changed_files > emitted_files {
166        result.push(format!(
167            "... and {} more files changed",
168            changed_files - emitted_files
169        ));
170    }
171
172    middle_truncate(
173        &trim_trailing_lines(&result.join("\n")),
174        16 * 1024,
175        7 * 1024,
176        7 * 1024,
177    )
178}
179
180struct DiffFile {
181    lines: Vec<String>,
182    is_diff: bool,
183}
184
185fn split_diff_files(output: &str, keep_commit_header: bool) -> Vec<DiffFile> {
186    let mut files = Vec::new();
187    let mut current = Vec::new();
188    let mut current_is_diff = false;
189
190    for line in output.lines() {
191        if line.starts_with("diff --git ") {
192            if !current.is_empty() {
193                files.push(DiffFile {
194                    lines: std::mem::take(&mut current),
195                    is_diff: current_is_diff,
196                });
197            }
198            current_is_diff = true;
199        } else if !current_is_diff && !keep_commit_header && !line.starts_with("diff --git ") {
200            current_is_diff = true;
201        }
202        current.push(line.to_string());
203    }
204
205    if !current.is_empty() {
206        files.push(DiffFile {
207            lines: current,
208            is_diff: current_is_diff,
209        });
210    }
211
212    files
213}
214
215fn compress_diff_file(lines: &[String]) -> Vec<String> {
216    let mut result = Vec::new();
217    let mut index = 0usize;
218
219    while index < lines.len() {
220        let line = &lines[index];
221        if !line.starts_with("@@") {
222            result.push(line.clone());
223            index += 1;
224            continue;
225        }
226
227        let hunk_start = index;
228        index += 1;
229        while index < lines.len() && !lines[index].starts_with("@@") {
230            index += 1;
231        }
232        let hunk = &lines[hunk_start..index];
233        append_hunk(&mut result, hunk);
234    }
235
236    result
237}
238
239fn append_hunk(result: &mut Vec<String>, hunk: &[String]) {
240    if hunk.len() <= HUNK_KEEP_LINES + 1 {
241        result.extend(hunk.iter().cloned());
242        return;
243    }
244
245    result.extend(hunk.iter().take(HUNK_KEEP_LINES + 1).cloned());
246    let remaining = &hunk[HUNK_KEEP_LINES + 1..];
247    let added = remaining
248        .iter()
249        .filter(|line| line.starts_with('+'))
250        .count();
251    let removed = remaining
252        .iter()
253        .filter(|line| line.starts_with('-'))
254        .count();
255    result.push(format!(
256        "... +{} -{} in {} more lines",
257        added,
258        removed,
259        remaining.len()
260    ));
261}
262
263fn count_hunks(lines: &[String]) -> usize {
264    lines.iter().filter(|line| line.starts_with("@@")).count()
265}
266
267fn compress_log(output: &str) -> String {
268    let mut commits = 0usize;
269    let mut omitted = 0usize;
270    let mut result = Vec::new();
271    let mut seen_authors = HashSet::new();
272
273    for line in output.lines() {
274        let is_commit = line.starts_with("commit ") || looks_like_oneline_commit(line);
275        if is_commit {
276            commits += 1;
277            if commits > LOG_KEEP_COMMITS {
278                omitted += 1;
279                continue;
280            }
281        }
282
283        if commits > LOG_KEEP_COMMITS {
284            continue;
285        }
286
287        if line.starts_with("Author: ") && !seen_authors.insert(line.to_string()) {
288            continue;
289        }
290
291        result.push(line.to_string());
292    }
293
294    if omitted > 0 {
295        result.push(format!("... {} more commits", omitted));
296    }
297
298    trim_trailing_lines(&result.join("\n"))
299}
300
301fn looks_like_oneline_commit(line: &str) -> bool {
302    let Some((hash, _message)) = line.split_once(' ') else {
303        return false;
304    };
305    (7..=40).contains(&hash.len()) && hash.bytes().all(|byte| byte.is_ascii_hexdigit())
306}
307
308fn compress_blame(output: &str) -> String {
309    let total = output.lines().count();
310    if total <= BLAME_KEEP_LINES {
311        return trim_trailing_lines(output);
312    }
313
314    let mut result: Vec<String> = output
315        .lines()
316        .take(BLAME_KEEP_LINES)
317        .map(ToString::to_string)
318        .collect();
319    result.push(format!("... {} more blame lines", total - BLAME_KEEP_LINES));
320    result.join("\n")
321}
322
323fn trim_trailing_lines(input: &str) -> String {
324    input
325        .lines()
326        .map(str::trim_end)
327        .collect::<Vec<_>>()
328        .join("\n")
329}