use forge::signal::compactor;
use once_cell::sync::Lazy;
use regex::Regex;
static GIT_HINT_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?m)^\s*(\(use "git|hint:|nothing to commit|no changes added)"#).unwrap()
});
static GIT_OBJECT_COUNT_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^(Counting|Compressing|remote: Counting|remote: Compressing|Receiving|Resolving|Writing) objects:.*\n?").unwrap()
});
static GIT_DELTA_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^(remote: )?delta compression.*\n?").unwrap());
static GIT_INDEX_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^index [0-9a-f]+\.\.[0-9a-f]+ \d+\n?").unwrap()
});
static GIT_SIMILARITY_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^similarity index \d+%\n?(rename from .*\nrename to .*\n?)?").unwrap()
});
static GIT_COMMIT_SIGNING_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?m)^( +gpg:|-----BEGIN PGP|-----END PGP|gpg: Signature).*\n?").unwrap()
});
static GIT_BLAME_HEADER_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^[0-9a-f]{40} \(").unwrap());
pub fn compress_log(raw: &str, max_commits: usize) -> String {
let cleaned = compactor::normalise(raw);
let lines: Vec<&str> = cleaned.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.len() <= max_commits {
return cleaned;
}
let head = max_commits / 2;
let tail = max_commits - head;
let skipped = lines.len() - head - tail;
format!(
"{}\n... [{skipped} commits omitted] ...\n{}",
lines[..head].join("\n"),
lines[lines.len() - tail..].join("\n"),
)
}
pub fn compress_log_verbose(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let noise_prefixes = ["Author:", "Date:", "Merge:", " "];
let mut out = Vec::new();
for line in cleaned.lines() {
let t = line.trim();
if t.is_empty() {
continue;
}
if noise_prefixes.iter().any(|p| line.starts_with(p)) {
continue;
}
if let Some(rest) = line.strip_prefix("commit ") {
out.push(format!("commit {}", &rest[..8.min(rest.len())]));
} else {
out.push(line.to_string());
}
}
out.join("\n")
}
pub fn compress_log_stat(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let mut out: Vec<String> = Vec::new();
let mut chunks: Vec<Vec<&str>> = Vec::new();
let mut current: Vec<&str> = Vec::new();
for line in cleaned.lines() {
if line.starts_with("commit ") && line.len() == 47 && !current.is_empty() {
chunks.push(current);
current = Vec::new();
}
current.push(line);
}
if !current.is_empty() {
chunks.push(current);
}
for chunk in &chunks {
for &line in chunk {
let trimmed = line.trim();
if trimmed.is_empty()
|| line.starts_with("Author:")
|| line.starts_with("Date:")
|| line.starts_with("Merge:")
{
continue;
}
if let Some(rest) = line.strip_prefix("commit ") {
out.push(format!("commit {}", &rest[..8.min(rest.len())]));
continue;
}
if line.starts_with(' ') && line.contains('|') {
let path = line.split('|').next().unwrap_or("").trim();
if is_skip_path(path) {
continue; }
}
out.push(line.to_string());
}
}
out.join("\n")
}
const SKIP_DIRS: &[&str] = &[
"node_modules/",
"target/",
"dist/",
"build/",
".git/",
".venv/",
"venv/",
"vendor/",
".next/",
".nuxt/",
"out/",
"coverage/",
"__pycache__/",
];
fn is_skip_path(path: &str) -> bool {
if path.starts_with("...") {
return true;
}
if SKIP_DIRS.iter().any(|dir| path.starts_with(dir)) {
return true;
}
SKIP_DIRS.iter().any(|dir| {
let component = format!("/{dir}");
path.contains(&component)
})
}
fn diff_header_path(header: &str) -> Option<&str> {
header
.strip_prefix("diff --git a/")
.and_then(|s| s.split(" b/").next())
}
fn partition_diff_blocks(diff: &str) -> (Vec<String>, usize) {
let mut kept: Vec<String> = Vec::new();
let mut current = String::new();
let mut current_skip = false;
let mut skipped = 0usize;
let mut in_block = false;
for line in diff.lines() {
if line.starts_with("diff --git ") {
if in_block {
if current_skip {
skipped += 1;
} else {
kept.push(current.clone());
}
current.clear();
}
current_skip = diff_header_path(line).is_some_and(is_skip_path);
in_block = true;
}
if in_block {
current.push_str(line);
current.push('\n');
}
}
if in_block {
if current_skip {
skipped += 1;
} else {
kept.push(current);
}
}
(kept, skipped)
}
pub fn compress_diff(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s1 = GIT_INDEX_RE.replace_all(&cleaned, "");
let s2 = GIT_SIMILARITY_RE.replace_all(&s1, "");
let (kept, skipped) = partition_diff_blocks(&s2);
if kept.is_empty() && skipped == 0 {
return s2.into_owned();
}
let total = kept.len();
let mut out = String::new();
let show = total.min(8);
for block in &kept[..show] {
out.push_str(block);
}
if total > 8 {
out.push_str(&format!("... [{} more files not shown] ...\n", total - 8));
}
if skipped > 0 {
out.push_str(&format!(
"... [{skipped} build/dependency files skipped] ...\n"
));
}
out
}
pub fn compress_diff_stat(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let all_lines: Vec<&str> = cleaned.lines().filter(|l| !l.trim().is_empty()).collect();
if all_lines.is_empty() {
return cleaned;
}
let (file_lines, summary) = if all_lines.last().is_some_and(|l| l.contains("changed")) {
(
&all_lines[..all_lines.len() - 1],
Some(*all_lines.last().unwrap()),
)
} else {
(all_lines.as_slice(), None)
};
let mut kept: Vec<&str> = Vec::new();
let mut skip_count = 0usize;
for &line in file_lines {
let path = line.split('|').next().unwrap_or("").trim();
if is_skip_path(path) {
skip_count += 1;
} else {
kept.push(line);
}
}
let mut out: Vec<String> = Vec::new();
if kept.len() > 5 {
for &line in &kept[..5] {
out.push(line.to_string());
}
out.push(format!("... [{} more files] ...", kept.len() - 5));
} else {
for &line in &kept {
out.push(line.to_string());
}
}
if skip_count > 0 {
out.push(format!(
"... [{skip_count} build/dependency files skipped] ..."
));
}
if let Some(s) = summary {
out.push(s.to_string());
}
out.join("\n")
}
pub fn compress_status(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let stripped = GIT_HINT_RE.replace_all(&cleaned, "");
let mut out: Vec<&str> = Vec::new();
let mut untracked_count = 0usize;
let mut in_untracked = false;
let max_untracked = 8;
for line in stripped.lines() {
if line.contains("Untracked files:") {
in_untracked = true;
}
if line.contains("Changes to be committed:") || line.contains("Changes not staged") {
in_untracked = false;
}
if in_untracked && line.starts_with('\t') {
untracked_count += 1;
if untracked_count <= max_untracked {
out.push(line);
} else if untracked_count == max_untracked + 1 {
out.push(" ... and more untracked files (run git status for full list)");
}
} else {
out.push(line);
}
}
compactor::collapse_blanks(&out.join("\n"))
}
pub fn compress_commit(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s1 = GIT_COMMIT_SIGNING_RE.replace_all(&cleaned, "");
let out: Vec<&str> = s1
.lines()
.filter(|l| {
let t = l.trim();
!t.starts_with("running ") && !t.contains("pre-commit") && !t.contains("post-commit")
})
.collect();
compactor::collapse_blanks(&out.join("\n"))
}
pub fn compress_fetch(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s1 = GIT_OBJECT_COUNT_RE.replace_all(&cleaned, "");
let s2 = GIT_DELTA_RE.replace_all(&s1, "");
let out: Vec<&str> = s2.lines().filter(|l| l.trim() != "remote:").collect();
compactor::collapse_blanks(&out.join("\n"))
}
pub fn compress_blame(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
if !GIT_BLAME_HEADER_RE.is_match(&cleaned) {
return cleaned;
}
let short_sha_re = Regex::new(r"^[0-9a-f]{40}").unwrap();
let lines: Vec<String> = cleaned
.lines()
.map(|l| {
if short_sha_re.is_match(l) {
format!("{}{}", &l[..8], &l[40..])
} else {
l.to_string()
}
})
.collect();
lines.join("\n")
}
pub fn compress_stash_list(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let lines: Vec<&str> = cleaned.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.len() <= 20 {
return lines.join("\n");
}
format!(
"{}\n... [{} more stash entries]",
lines[..20].join("\n"),
lines.len() - 20
)
}
static GIT_BRANCH_TRACKING_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r" \[(?:ahead \d+(?:, )?)?(?:behind \d+)?\]").unwrap());
pub fn compress_branch(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s = GIT_BRANCH_TRACKING_RE.replace_all(&cleaned, "");
let lines: Vec<&str> = s.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.len() <= 40 {
return lines.join("\n");
}
format!(
"{}\n... [{} more branches]",
lines[..40].join("\n"),
lines.len() - 40
)
}
pub fn compress_tag(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let lines: Vec<&str> = cleaned.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.len() <= 30 {
return lines.join("\n");
}
format!(
"{}\n... [{} more tags]",
lines[..30].join("\n"),
lines.len() - 30
)
}
pub fn compress_remote(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let lines: Vec<&str> = cleaned.lines().filter(|l| !l.ends_with("(push)")).collect();
lines.join("\n")
}
pub fn compress_rebase(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let rebase_progress_re = Regex::new(r"(?m)^Rebasing \(\d+/\d+\)\n?").unwrap();
let s = rebase_progress_re.replace_all(&cleaned, "");
compactor::collapse_blanks(&s)
}
pub fn compress_cherry_pick(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
if !cleaned.contains("CONFLICT") && !cleaned.contains("conflict") {
return cleaned.lines().next().unwrap_or("").to_string();
}
cleaned
}
pub fn compress_show(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s1 = GIT_INDEX_RE.replace_all(&cleaned, "");
let s2 = GIT_SIMILARITY_RE.replace_all(&s1, "");
compactor::collapse_blanks(&s2)
}
pub fn compress_worktree(raw: &str) -> String {
compactor::normalise(raw)
}
pub fn compress_gc(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let s = GIT_OBJECT_COUNT_RE.replace_all(&cleaned, "");
compactor::collapse_blanks(&s)
}
pub fn compress_submodule(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let lines: Vec<&str> = cleaned
.lines()
.filter(|l| {
let t = l.trim();
!t.is_empty()
&& (!t.starts_with("Cloning into") || t.len() <= 60)
&& !t.starts_with("remote: Counting")
&& !t.starts_with("remote: Compressing")
&& !t.starts_with("Receiving objects:")
&& !t.starts_with("Resolving deltas:")
})
.collect();
lines.join("\n")
}
pub fn compress_bisect(raw: &str) -> String {
let cleaned = compactor::normalise(raw);
let out: Vec<&str> = cleaned
.lines()
.filter(|l| {
l.starts_with("Bisecting:")
|| l.contains("first bad commit")
|| l.starts_with("commit ")
|| l.starts_with("[")
|| l.starts_with("error")
|| l.starts_with("fatal")
})
.collect();
if out.is_empty() {
cleaned
} else {
out.join("\n")
}
}
pub fn compress_git(subcmd: &str, raw: &str, exit_code: i32) -> String {
if exit_code != 0 {
return compactor::normalise(raw);
}
let sub = subcmd.trim();
if sub.starts_with("log") {
let is_verbose = raw
.lines()
.any(|l| l.starts_with("commit ") && l.len() == 47);
if is_verbose {
if sub.contains("--stat")
|| sub.contains("--name-only")
|| sub.contains("--name-status")
{
return compress_log_stat(raw);
}
return compress_log_verbose(raw);
}
return compress_log(raw, 12);
}
if sub.starts_with("diff") {
if sub.contains("--stat") {
return compress_diff_stat(raw);
}
return compress_diff(raw);
}
if sub.starts_with("status") {
return compress_status(raw);
}
if sub.starts_with("commit") {
return compress_commit(raw);
}
if sub.starts_with("fetch") || sub.starts_with("pull") {
return compress_fetch(raw);
}
if sub.starts_with("blame") {
return compress_blame(raw);
}
if sub.starts_with("stash") {
return compress_stash_list(raw);
}
if sub.starts_with("branch") {
return compress_branch(raw);
}
if sub.starts_with("tag") {
return compress_tag(raw);
}
if sub.starts_with("remote") {
return compress_remote(raw);
}
if sub.starts_with("rebase") {
return compress_rebase(raw);
}
if sub.starts_with("cherry-pick") {
return compress_cherry_pick(raw);
}
if sub.starts_with("show") {
return compress_show(raw);
}
if sub.starts_with("worktree") {
return compress_worktree(raw);
}
if sub.starts_with("bisect") {
return compress_bisect(raw);
}
if sub.starts_with("gc") {
return compress_gc(raw);
}
if sub.starts_with("submodule") {
return compress_submodule(raw);
}
let cleaned = compactor::normalise(raw);
let s = GIT_OBJECT_COUNT_RE.replace_all(&cleaned, "");
compactor::collapse_blanks(&s)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn log_compression_keeps_head_tail() {
let log = (0..20)
.map(|i| format!("abc{i:04} commit message {i}"))
.collect::<Vec<_>>()
.join("\n");
let out = compress_log(&log, 6);
assert!(out.contains("omitted"), "should summarise middle: {out}");
assert!(out.lines().count() < 20);
}
#[test]
fn diff_stat_truncates_long_lists() {
let mut s = String::new();
for i in 0..20 {
s.push_str(&format!(" file{i}.rs | 10 ++--\n"));
}
s.push_str(" 20 files changed, 100 insertions(+), 50 deletions(-)\n");
let out = compress_diff_stat(&s);
assert!(out.contains("more files"));
assert!(out.contains("20 files changed"));
}
#[test]
fn status_strips_hints() {
let raw =
"On branch main\nChanges not staged:\n (use \"git add\" ...)\n\tmodified: foo.rs\n";
let out = compress_status(raw);
assert!(!out.contains("use \"git"), "should strip hints: {out}");
assert!(out.contains("foo.rs"));
}
#[test]
fn commit_strips_signing() {
let raw = "[main abc1234] feat: add thing\n gpg: Signature made Mon\n 1 file changed\n";
let out = compress_commit(raw);
assert!(!out.contains("gpg:"));
assert!(out.contains("feat: add thing"));
}
#[test]
fn fetch_strips_object_count() {
let raw = "remote: Counting objects: 10, done.\nFrom github.com:org/repo\n abc..def main -> origin/main\n";
let out = compress_fetch(raw);
assert!(!out.contains("Counting objects"));
assert!(out.contains("main -> origin/main"));
}
#[test]
fn diff_strips_index_lines() {
let raw = "diff --git a/foo.rs b/foo.rs\nindex abc123..def456 100644\n--- a/foo.rs\n+++ b/foo.rs\n@@ -1,3 +1,4 @@\n+new line\n";
let out = compress_diff(raw);
assert!(
!out.contains("index abc123"),
"index line should be stripped: {out}"
);
assert!(out.contains("+new line"));
}
#[test]
fn diff_truncates_many_files() {
let mut s = String::new();
for i in 0..12 {
s.push_str(&format!("diff --git a/f{i}.rs b/f{i}.rs\n--- a/f{i}.rs\n+++ b/f{i}.rs\n@@ -1 +1 @@\n-old\n+new\n"));
}
let out = compress_diff(&s);
assert!(out.contains("more files not shown"));
}
#[test]
fn stash_list_truncates_at_20() {
let raw = (0..30)
.map(|i| format!("stash@{{{i}}}: On main: WIP change {i}"))
.collect::<Vec<_>>()
.join("\n");
let out = compress_stash_list(&raw);
assert!(out.contains("more stash entries"), "{out}");
}
#[test]
fn tag_list_truncates_at_30() {
let raw = (0..50)
.map(|i| format!("v1.{i}.0"))
.collect::<Vec<_>>()
.join("\n");
let out = compress_tag(&raw);
assert!(out.contains("more tags"), "{out}");
}
#[test]
fn remote_deduplicates_push_lines() {
let raw = "origin\thttps://github.com/org/repo.git (fetch)\norigin\thttps://github.com/org/repo.git (push)\n";
let out = compress_remote(raw);
assert!(!out.contains("(push)"), "{out}");
assert!(out.contains("(fetch)") || out.contains("origin"), "{out}");
}
#[test]
fn rebase_strips_progress() {
let raw = "Rebasing (1/10)\nRebasing (2/10)\nRebasing (10/10)\nSuccessfully rebased and updated refs/heads/main.\n";
let out = compress_rebase(raw);
assert!(!out.contains("Rebasing (1/10)"), "{out}");
assert!(out.contains("Successfully rebased"), "{out}");
}
#[test]
fn cherry_pick_clean_keeps_first_line_only() {
let raw = "[main abc1234] feat: add thing\n Date: Mon Jan 1 12:00:00 2024\n 1 file changed, 5 insertions(+)\n";
let out = compress_cherry_pick(raw);
assert!(out.contains("feat: add thing"), "{out}");
assert!(!out.contains("Date:"), "{out}");
}
#[test]
fn diff_filters_node_modules() {
let mut s = String::new();
s.push_str("diff --git a/src/main.rs b/src/main.rs\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -1 +1 @@\n-old\n+new\n");
s.push_str("diff --git a/node_modules/foo/index.js b/node_modules/foo/index.js\n--- a/node_modules/foo/index.js\n+++ b/node_modules/foo/index.js\n@@ -1 +1 @@\n-old\n+new\n");
let out = compress_diff(&s);
assert!(
out.contains("src/main.rs"),
"should keep source file: {out}"
);
assert!(
!out.contains("node_modules/foo"),
"should skip node_modules: {out}"
);
assert!(
out.contains("build/dependency files skipped"),
"should mention skipped: {out}"
);
}
#[test]
fn diff_filters_target_dir() {
let mut s = String::new();
s.push_str("diff --git a/src/lib.rs b/src/lib.rs\n--- a/src/lib.rs\n+++ b/src/lib.rs\n@@ -1 +1 @@\n-x\n+y\n");
s.push_str("diff --git a/target/debug/build/foo.rs b/target/debug/build/foo.rs\n--- a/target/debug/build/foo.rs\n+++ b/target/debug/build/foo.rs\n@@ -1 +1 @@\n-a\n+b\n");
let out = compress_diff(&s);
assert!(out.contains("src/lib.rs"), "{out}");
assert!(!out.contains("target/debug"), "{out}");
assert!(out.contains("build/dependency files skipped"), "{out}");
}
#[test]
fn diff_stat_filters_node_modules() {
let s = " node_modules/lodash/index.js | 5 +++++\n src/main.rs | 3 ---\n 2 files changed, 5 insertions(+), 3 deletions(-)\n";
let out = compress_diff_stat(&s);
assert!(
!out.contains("node_modules/lodash"),
"should skip node_modules: {out}"
);
assert!(
out.contains("src/main.rs"),
"should keep source file: {out}"
);
assert!(
out.contains("build/dependency files skipped"),
"should mention skipped: {out}"
);
assert!(
out.contains("2 files changed"),
"should keep summary line: {out}"
);
}
#[test]
fn diff_no_skip_dirs_unchanged() {
let mut s = String::new();
for i in 0..4 {
s.push_str(&format!("diff --git a/src/f{i}.rs b/src/f{i}.rs\n--- a/src/f{i}.rs\n+++ b/src/f{i}.rs\n@@ -1 +1 @@\n-old\n+new\n"));
}
let out = compress_diff(&s);
assert!(!out.contains("skipped"), "no skip dirs: {out}");
assert!(out.contains("src/f0.rs"), "{out}");
}
#[test]
fn log_stat_filters_node_modules() {
let raw = "commit a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2\nAuthor: dev <dev@test.com>\nDate: Mon Jan 1 2024\n\n add files\n\n src/auth/service.ts | 10 ++++++++++\n node_modules/foo/index.js | 5 +++++\n node_modules/bar/bar.js | 3 +++\n 3 files changed, 18 insertions(+)\n";
let out = compress_log_stat(raw);
assert!(
out.contains("src/auth/service.ts"),
"should keep source: {out}"
);
assert!(
!out.contains("node_modules/foo"),
"should skip node_modules: {out}"
);
assert!(!out.contains("Author:"), "should strip Author: {out}");
}
}