Skip to main content

chronicle/annotate/
filter.rs

1use crate::annotate::gather::AnnotationContext;
2
3/// Decision from pre-LLM filtering.
4#[derive(Debug, PartialEq, Eq)]
5pub enum FilterDecision {
6    /// Proceed with full LLM annotation.
7    Annotate,
8    /// Skip annotation entirely (lockfile-only, merge commits, etc.)
9    Skip(String),
10    /// Produce a minimal local annotation without calling the LLM.
11    Trivial(String),
12}
13
14/// Lockfile patterns that indicate no meaningful code changes.
15const LOCKFILE_PATTERNS: &[&str] = &[
16    "Cargo.lock",
17    "package-lock.json",
18    "yarn.lock",
19    "pnpm-lock.yaml",
20    "Gemfile.lock",
21    "poetry.lock",
22];
23
24/// Binary file extensions that indicate non-code content.
25const BINARY_EXTENSIONS: &[&str] = &[
26    "png", "jpg", "jpeg", "gif", "bmp", "ico", "svg", "woff", "woff2", "ttf", "eot", "pdf", "zip",
27    "tar", "gz", "bz2", "exe", "dll", "so", "dylib", "pyc", "class", "o", "obj",
28];
29
30/// Generated/vendored file patterns that aren't worth annotating.
31const GENERATED_PATTERNS: &[&str] = &[
32    ".min.js",
33    ".min.css",
34    "vendor/",
35    "vendored/",
36    "node_modules/",
37    ".generated.",
38    "_generated.",
39    "dist/",
40    "build/",
41];
42
43/// Check if a file path refers to a binary file based on extension.
44fn is_binary_path(path: &str) -> bool {
45    if let Some(ext) = path.rsplit('.').next() {
46        BINARY_EXTENSIONS.contains(&ext.to_lowercase().as_str())
47    } else {
48        false
49    }
50}
51
52/// Check if a file path refers to a generated or vendored file.
53fn is_generated_path(path: &str) -> bool {
54    let lower = path.to_lowercase();
55    GENERATED_PATTERNS
56        .iter()
57        .any(|pattern| lower.contains(pattern))
58}
59
60/// Default trivial threshold: if total changed lines <= this, mark as trivial.
61const TRIVIAL_THRESHOLD: usize = 3;
62
63/// Check if this commit should be annotated, skipped, or trivially handled.
64pub fn pre_llm_filter(context: &AnnotationContext) -> FilterDecision {
65    // Skip: commit message matches skip patterns
66    let msg = context.commit_message.trim();
67    if msg.starts_with("Merge branch") {
68        return FilterDecision::Skip("merge commit".to_string());
69    }
70    if msg.starts_with("WIP") {
71        return FilterDecision::Skip("work-in-progress commit".to_string());
72    }
73    if msg.starts_with("fixup!") {
74        return FilterDecision::Skip("fixup commit".to_string());
75    }
76    if msg.starts_with("squash!") {
77        return FilterDecision::Skip("squash commit".to_string());
78    }
79
80    // Skip: all files are lockfiles
81    if !context.diffs.is_empty()
82        && context.diffs.iter().all(|d| {
83            LOCKFILE_PATTERNS
84                .iter()
85                .any(|pattern| d.path.ends_with(pattern))
86        })
87    {
88        return FilterDecision::Skip("lockfile-only changes".to_string());
89    }
90
91    // Skip: all files are binary
92    if !context.diffs.is_empty() && context.diffs.iter().all(|d| is_binary_path(&d.path)) {
93        return FilterDecision::Skip("binary-only changes".to_string());
94    }
95
96    // Skip: all files are generated/vendored
97    if !context.diffs.is_empty() && context.diffs.iter().all(|d| is_generated_path(&d.path)) {
98        return FilterDecision::Skip("generated/vendored changes".to_string());
99    }
100
101    // Trivial: total changed lines <= threshold
102    let total_changed: usize = context.diffs.iter().map(|d| d.changed_line_count()).sum();
103
104    if total_changed <= TRIVIAL_THRESHOLD {
105        return FilterDecision::Trivial(format!(
106            "trivial change ({} lines changed)",
107            total_changed
108        ));
109    }
110
111    FilterDecision::Annotate
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117    use crate::git::{DiffStatus, FileDiff, Hunk, HunkLine};
118
119    fn make_context(message: &str, diffs: Vec<FileDiff>) -> AnnotationContext {
120        AnnotationContext {
121            commit_sha: "abc123".to_string(),
122            commit_message: message.to_string(),
123            author_name: "Test".to_string(),
124            author_email: "test@test.com".to_string(),
125            timestamp: "2024-01-01T00:00:00Z".to_string(),
126            diffs,
127            author_context: None,
128        }
129    }
130
131    fn make_diff(path: &str, added: usize, removed: usize) -> FileDiff {
132        let mut lines = Vec::new();
133        for _ in 0..added {
134            lines.push(HunkLine::Added("+ line".to_string()));
135        }
136        for _ in 0..removed {
137            lines.push(HunkLine::Removed("- line".to_string()));
138        }
139        FileDiff {
140            path: path.to_string(),
141            old_path: None,
142            status: DiffStatus::Modified,
143            hunks: vec![Hunk {
144                old_start: 1,
145                old_count: removed as u32,
146                new_start: 1,
147                new_count: added as u32,
148                header: String::new(),
149                lines,
150            }],
151        }
152    }
153
154    #[test]
155    fn test_skip_merge() {
156        let ctx = make_context("Merge branch 'feature' into main", vec![]);
157        assert!(matches!(pre_llm_filter(&ctx), FilterDecision::Skip(_)));
158    }
159
160    #[test]
161    fn test_skip_wip() {
162        let ctx = make_context("WIP stuff", vec![]);
163        assert!(matches!(pre_llm_filter(&ctx), FilterDecision::Skip(_)));
164    }
165
166    #[test]
167    fn test_skip_lockfile_only() {
168        let ctx = make_context("Update deps", vec![make_diff("Cargo.lock", 10, 5)]);
169        assert!(matches!(pre_llm_filter(&ctx), FilterDecision::Skip(_)));
170    }
171
172    #[test]
173    fn test_trivial() {
174        let ctx = make_context("Fix typo", vec![make_diff("src/main.rs", 1, 1)]);
175        assert!(matches!(pre_llm_filter(&ctx), FilterDecision::Trivial(_)));
176    }
177
178    #[test]
179    fn test_annotate() {
180        let ctx = make_context("Add new feature", vec![make_diff("src/main.rs", 20, 5)]);
181        assert_eq!(pre_llm_filter(&ctx), FilterDecision::Annotate);
182    }
183
184    #[test]
185    fn test_skip_binary_only() {
186        let ctx = make_context("Add logo", vec![make_diff("assets/logo.png", 10, 0)]);
187        assert!(
188            matches!(pre_llm_filter(&ctx), FilterDecision::Skip(ref s) if s.contains("binary"))
189        );
190    }
191
192    #[test]
193    fn test_skip_generated_only() {
194        let ctx = make_context(
195            "Update vendored deps",
196            vec![make_diff("vendor/lib.js", 100, 50)],
197        );
198        assert!(
199            matches!(pre_llm_filter(&ctx), FilterDecision::Skip(ref s) if s.contains("generated"))
200        );
201    }
202
203    #[test]
204    fn test_mixed_binary_and_code() {
205        let ctx = make_context(
206            "Add feature with icon",
207            vec![
208                make_diff("src/main.rs", 20, 5),
209                make_diff("assets/icon.png", 10, 0),
210            ],
211        );
212        assert_eq!(pre_llm_filter(&ctx), FilterDecision::Annotate);
213    }
214
215    #[test]
216    fn test_skip_min_js_only() {
217        let ctx = make_context(
218            "Rebuild minified assets",
219            vec![make_diff("dist/app.min.js", 500, 400)],
220        );
221        assert!(
222            matches!(pre_llm_filter(&ctx), FilterDecision::Skip(ref s) if s.contains("generated"))
223        );
224    }
225
226    #[test]
227    fn test_is_binary_path() {
228        assert!(is_binary_path("logo.png"));
229        assert!(is_binary_path("path/to/image.JPG"));
230        assert!(is_binary_path("lib.so"));
231        assert!(!is_binary_path("src/main.rs"));
232        assert!(!is_binary_path("README.md"));
233    }
234
235    #[test]
236    fn test_is_generated_path() {
237        assert!(is_generated_path("vendor/lib.js"));
238        assert!(is_generated_path("dist/bundle.js"));
239        assert!(is_generated_path("app.min.js"));
240        assert!(is_generated_path("node_modules/foo/index.js"));
241        assert!(!is_generated_path("src/main.rs"));
242    }
243}