Skip to main content

codelens_engine/
redundant_definitions.rs

1//! Detects "thin wrapper" definitions — functions whose entire body is a
2//! single call to another function with one literal default argument
3//! (e.g. `pub fn record_X(&self) { self.record_X_for_session(None) }`).
4//!
5//! This is a syntactic-only detector that catches the exact pattern flagged
6//! by self-dogfooding in v1.13.0 Phase 1-A: 16 `record_*` wrappers all
7//! forwarding to their `_for_session(None)` substrate. The substrate could
8//! be flagged by `find_dead_code_v2` only AFTER the wrappers were removed,
9//! so this complement helps surface the cluster pre-deletion.
10
11use crate::project::{collect_files, ProjectRoot};
12use anyhow::Result;
13use regex::Regex;
14use serde::Serialize;
15use std::path::Path;
16use std::sync::LazyLock;
17
18/// Matches a Rust one-line wrapper:
19///   `pub fn NAME(args) [-> RetType] { self.OTHER(args, LITERAL) [;] }`
20///   `fn NAME(args) [-> RetType] { OTHER(args, LITERAL) [;] }`
21/// where LITERAL is one of: `None`, `Default::default()`, `false`, `true`,
22/// a bare integer literal, or a quoted string literal.
23static RUST_ONE_LINE_WRAPPER_RE: LazyLock<Regex> = LazyLock::new(|| {
24    // (?m): multiline (^/$ are line anchors).
25    // Inner args use [^){};|] to reject:
26    //   - `;` (statement boundary — would mean multi-statement body)
27    //   - `{` `}` (block — closure body, struct literal)
28    //   - `|` (closure pipe — `|x| ...` indicates non-trivial logic, not a literal default)
29    // Default group is REQUIRED (no trailing `?`): a wrapper without a literal
30    // default is just an alias / passthrough, not the cleanup-target shape this
31    // detector exists for. Codex review on PR #148 (P1).
32    Regex::new(
33        r#"(?m)^\s*(?:pub(?:\([^)]*\))?\s+)?fn\s+(?P<wrapper>[A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?:->\s*[^{]+?)?\s*\{\s*(?:self\.|Self::)?(?P<target>[A-Za-z_][A-Za-z0-9_]*)\s*\([^){};|]*?(?P<default>None|Default::default\(\)|true|false|-?\d+|"[^"]*")\s*\)\s*;?\s*\}\s*$"#
34    ).unwrap()
35});
36
37#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
38pub struct RedundantDefinitionEntry {
39    pub file: String,
40    pub wrapper: String,
41    pub target: String,
42    pub line: usize,
43    pub default_arg: Option<String>,
44    pub kind: &'static str,
45}
46
47/// Finds Rust one-line wrappers in the project.
48///
49/// Returns a list of (wrapper, target) pairs. Callers can group by `target`
50/// to find substrates with multiple wrappers — the highest-leverage cleanup
51/// opportunity per Phase 1-A's findings.
52pub fn find_redundant_definitions(
53    project: &ProjectRoot,
54    max_results: usize,
55) -> Result<Vec<RedundantDefinitionEntry>> {
56    let mut results: Vec<RedundantDefinitionEntry> = Vec::new();
57    let candidates = collect_files(project.as_path(), is_rust_file)?;
58
59    for path in &candidates {
60        let source = match std::fs::read_to_string(path) {
61            Ok(s) => s,
62            Err(_) => continue,
63        };
64        let relative = project.to_relative(path);
65        if is_test_file(&relative) {
66            continue;
67        }
68        // Codex P1 (PR #149): scan the original source so reported line
69        // numbers match the file the user actually opens. Skip matches that
70        // fall inside `#[cfg(test)] mod ...` ranges instead of stripping
71        // them — stripping rewrites line offsets and made every location
72        // shift after the first cfg(test) block.
73        let cfg_test_ranges = collect_cfg_test_ranges(&source);
74        scan_rust_source(&source, &relative, &cfg_test_ranges, &mut results);
75        if max_results > 0 && results.len() >= max_results {
76            break;
77        }
78    }
79
80    results.sort_by(|a, b| {
81        a.target
82            .cmp(&b.target)
83            .then(a.file.cmp(&b.file))
84            .then(a.line.cmp(&b.line))
85    });
86    if max_results > 0 && results.len() > max_results {
87        results.truncate(max_results);
88    }
89    Ok(results)
90}
91
92fn scan_rust_source(
93    source: &str,
94    file: &str,
95    skip_ranges: &[(usize, usize)],
96    out: &mut Vec<RedundantDefinitionEntry>,
97) {
98    for m in RUST_ONE_LINE_WRAPPER_RE.captures_iter(source) {
99        let match_start = m.get(0).map(|m| m.start()).unwrap_or(0);
100        if range_contains(skip_ranges, match_start) {
101            continue;
102        }
103        let wrapper = m
104            .name("wrapper")
105            .map(|m| m.as_str().to_owned())
106            .unwrap_or_default();
107        let target = m
108            .name("target")
109            .map(|m| m.as_str().to_owned())
110            .unwrap_or_default();
111        if wrapper.is_empty() || target.is_empty() || wrapper == target {
112            continue;
113        }
114        let default_arg = m.name("default").map(|m| m.as_str().to_owned());
115        // Use the wrapper-name capture, not the whole match, so leading
116        // whitespace (including blank lines that `^\s*` swallows) does not
117        // shift the reported line backward.
118        let wrapper_start = m.name("wrapper").map(|m| m.start()).unwrap_or(match_start);
119        let line = byte_offset_to_line(source, wrapper_start);
120        out.push(RedundantDefinitionEntry {
121            file: file.to_owned(),
122            wrapper,
123            target,
124            line,
125            default_arg,
126            kind: "rust_one_line_wrapper",
127        });
128    }
129}
130
131fn range_contains(ranges: &[(usize, usize)], offset: usize) -> bool {
132    ranges
133        .iter()
134        .any(|(start, end)| offset >= *start && offset < *end)
135}
136
137fn byte_offset_to_line(source: &str, offset: usize) -> usize {
138    source[..offset.min(source.len())].matches('\n').count() + 1
139}
140
141fn is_rust_file(path: &Path) -> bool {
142    path.extension().and_then(|s| s.to_str()) == Some("rs")
143}
144
145/// Skip canonical test/example/bench harness files. Heuristic: any path
146/// segment named `tests`, `bench`, `benches`, `examples`, or any file
147/// ending in `_tests.rs`/`_test.rs`. Self-detector module is also
148/// skipped to avoid flagging its own fixture strings.
149fn is_test_file(relative: &str) -> bool {
150    if relative == "crates/codelens-engine/src/redundant_definitions.rs" {
151        return true;
152    }
153    let lower = relative.to_ascii_lowercase();
154    if lower.ends_with("_tests.rs") || lower.ends_with("_test.rs") {
155        return true;
156    }
157    lower.split('/').any(|seg| {
158        matches!(
159            seg,
160            "tests" | "test" | "bench" | "benches" | "examples" | "fixtures"
161        )
162    })
163}
164
165/// Returns byte ranges (start, end) for every `#[cfg(test)] mod NAME { ... }`
166/// block in `source`. Ranges are half-open: `[start, end)` covers the
167/// header through the closing `}` inclusive. Used by the scanner to skip
168/// matches that land inside test-only modules WITHOUT rewriting offsets,
169/// so reported line numbers stay accurate (Codex review on PR #149, P1).
170fn collect_cfg_test_ranges(source: &str) -> Vec<(usize, usize)> {
171    let mut ranges = Vec::new();
172    let mut idx = 0;
173    let bytes = source.as_bytes();
174    while idx < bytes.len() {
175        if bytes[idx] == b'#' {
176            let rest = &source[idx..];
177            if let Some(open_brace_offset) = match_cfg_test_mod_header(rest) {
178                let body_start = idx + open_brace_offset;
179                if let Some(body_end) = find_matching_brace(source, body_start) {
180                    let block_end = body_end + 1;
181                    ranges.push((idx, block_end));
182                    idx = block_end;
183                    continue;
184                }
185            }
186        }
187        idx += 1;
188    }
189    ranges
190}
191
192/// Removes `#[cfg(test)] mod ... { ... }` blocks (matched by regex with
193/// nested braces handled by depth counting). Retained for the unit test
194/// that still exercises the stripping behavior; the production scanner
195/// now uses `collect_cfg_test_ranges` to preserve line numbers.
196#[cfg_attr(not(test), allow(dead_code))]
197fn strip_cfg_test_modules(source: &str) -> String {
198    let mut out = String::with_capacity(source.len());
199    let mut chars = source.char_indices().peekable();
200    while let Some(&(idx, ch)) = chars.peek() {
201        if ch == '#' {
202            // try to match #[cfg(test)] mod NAME {
203            let rest = &source[idx..];
204            if let Some(open_brace_offset) = match_cfg_test_mod_header(rest) {
205                let body_start = idx + open_brace_offset;
206                let body_end = match find_matching_brace(source, body_start) {
207                    Some(e) => e,
208                    None => {
209                        out.push(ch);
210                        chars.next();
211                        continue;
212                    }
213                };
214                // skip over the entire `#[cfg(test)] mod NAME { ... }`
215                while let Some(&(j, _)) = chars.peek() {
216                    if j > body_end {
217                        break;
218                    }
219                    chars.next();
220                }
221                continue;
222            }
223        }
224        out.push(ch);
225        chars.next();
226    }
227    out
228}
229
230/// If `rest` starts with `#[cfg(test)] mod NAME {`, return the byte
231/// offset of the `{`. Otherwise None. Tolerant of attribute whitespace
232/// variants but not of comments inside the attribute.
233fn match_cfg_test_mod_header(rest: &str) -> Option<usize> {
234    static HEADER_RE: LazyLock<Regex> = LazyLock::new(|| {
235        Regex::new(r"^#\s*\[\s*cfg\s*\(\s*test\s*\)\s*\]\s*(?:pub(?:\([^)]*\))?\s+)?mod\s+[A-Za-z_][A-Za-z0-9_]*\s*\{")
236            .unwrap()
237    });
238    HEADER_RE.find(rest).map(|m| m.end() - 1)
239}
240
241/// Returns the byte index of the matching `}` for the `{` at `open_idx`.
242fn find_matching_brace(source: &str, open_idx: usize) -> Option<usize> {
243    let bytes = source.as_bytes();
244    if bytes.get(open_idx) != Some(&b'{') {
245        return None;
246    }
247    let mut depth = 1usize;
248    let mut i = open_idx + 1;
249    while i < bytes.len() {
250        match bytes[i] {
251            b'{' => depth += 1,
252            b'}' => {
253                depth -= 1;
254                if depth == 0 {
255                    return Some(i);
256                }
257            }
258            _ => {}
259        }
260        i += 1;
261    }
262    None
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn detects_self_dot_wrapper_with_none_default() {
271        let source = r#"
272impl Foo {
273    pub fn record_x(&self) { self.record_x_for_session(None) }
274    pub fn record_y(&self) { self.record_y_for_session(None); }
275}
276        "#;
277        let mut out = Vec::new();
278        scan_rust_source(source, "telemetry.rs", &[], &mut out);
279        assert_eq!(out.len(), 2, "got: {:?}", out);
280        assert_eq!(out[0].wrapper, "record_x");
281        assert_eq!(out[0].target, "record_x_for_session");
282        assert_eq!(out[0].default_arg.as_deref(), Some("None"));
283        assert_eq!(out[1].wrapper, "record_y");
284    }
285
286    #[test]
287    fn detects_bare_function_wrapper() {
288        let source = r#"
289pub fn helper(x: u32) -> bool { inner(x, false) }
290        "#;
291        let mut out = Vec::new();
292        scan_rust_source(source, "lib.rs", &[], &mut out);
293        assert_eq!(out.len(), 1);
294        assert_eq!(out[0].wrapper, "helper");
295        assert_eq!(out[0].target, "inner");
296        assert_eq!(out[0].default_arg.as_deref(), Some("false"));
297    }
298
299    #[test]
300    fn skips_zero_arg_passthrough_without_literal_default() {
301        // Codex P1 (PR #148): a wrapper with no literal default is just an
302        // alias, not the cleanup-target shape this detector exists for.
303        let source = r#"
304pub fn alias(x: u32) { inner(x) }
305pub fn passthrough(a: u32, b: u32) -> u32 { other(a, b) }
306        "#;
307        let mut out = Vec::new();
308        scan_rust_source(source, "x.rs", &[], &mut out);
309        assert!(
310            out.is_empty(),
311            "no-default forwards must not match: {:?}",
312            out
313        );
314    }
315
316    #[test]
317    fn line_numbers_reflect_original_source_after_cfg_test_skip() {
318        // Codex P1 (PR #149): when a `#[cfg(test)] mod` block precedes the
319        // production wrapper, the reported line must still match the
320        // original file — stripping the test block previously rewrote
321        // offsets and shifted every subsequent line.
322        // Fixture is 7 lines: cfg(test)+mod open at L1, body L2-L4, mod
323        // close at L5, blank L6, the production wrapper at L7.
324        let source = "#[cfg(test)]\nmod tests {\n    fn helper() {\n        foo(true, false);\n    }\n}\n\npub fn record_x(&self) { self.record_x_for_session(None) }\n";
325        let ranges = collect_cfg_test_ranges(source);
326        let mut out = Vec::new();
327        scan_rust_source(source, "events.rs", &ranges, &mut out);
328        assert_eq!(out.len(), 1, "got: {:?}", out);
329        assert_eq!(out[0].wrapper, "record_x");
330        assert_eq!(
331            out[0].line, 8,
332            "expected the wrapper to be reported at the original source line"
333        );
334    }
335
336    #[test]
337    fn skips_call_with_closure_arg() {
338        // Detector v3: a body that passes a closure to its delegate is NOT a thin
339        // wrapper — the closure carries logic. This was the false-positive shape
340        // that flagged `mutate_session_metrics` 13× during v1.13.3 dogfooding.
341        let source = r#"
342pub fn record_x(&self) {
343    self.mutate_session_metrics(None, |session| {
344        session.foo += 1;
345    });
346}
347        "#;
348        let mut out = Vec::new();
349        scan_rust_source(source, "events.rs", &[], &mut out);
350        assert!(out.is_empty(), "closure-arg call must not match: {:?}", out);
351    }
352
353    #[test]
354    fn skips_self_recursive_call() {
355        let source = r#"
356pub fn loop_me(&self) { self.loop_me(0) }
357        "#;
358        let mut out = Vec::new();
359        scan_rust_source(source, "x.rs", &[], &mut out);
360        // wrapper == target → not flagged (would be infinite recursion, not delegation)
361        assert!(out.is_empty(), "got: {:?}", out);
362    }
363
364    #[test]
365    fn strip_cfg_test_modules_removes_test_block() {
366        let source = r#"
367pub fn real_thing(&self) { self.real_thing_inner(None) }
368
369#[cfg(test)]
370mod tests {
371    fn helper() { foo(true) }
372}
373"#;
374        let stripped = super::strip_cfg_test_modules(source);
375        assert!(stripped.contains("real_thing"));
376        assert!(
377            !stripped.contains("foo(true)"),
378            "test mod should be gone: {}",
379            stripped
380        );
381    }
382
383    #[test]
384    fn is_test_file_recognizes_canonical_paths() {
385        assert!(super::is_test_file("crates/foo/tests/something.rs"));
386        assert!(super::is_test_file("crates/foo/src/internals_tests.rs"));
387        assert!(super::is_test_file("benchmarks/bench/runner.rs"));
388        assert!(!super::is_test_file("crates/foo/src/main.rs"));
389        assert!(super::is_test_file(
390            "crates/codelens-engine/src/redundant_definitions.rs"
391        ));
392    }
393
394    #[test]
395    #[ignore]
396    fn dogfood_self_repo() {
397        // Run with: cargo test -p codelens-engine dogfood_self_repo -- --ignored --nocapture
398        let repo = std::env::var("CODELENS_REPO_ROOT")
399            .unwrap_or_else(|_| "/Users/bagjaeseog/codelens-mcp-plugin".to_owned());
400        let project = crate::project::ProjectRoot::new(repo).expect("project root");
401        let results =
402            super::find_redundant_definitions(&project, 200).expect("find_redundant_definitions");
403        eprintln!(
404            "\n=== {} redundant definitions in self (post v2 filtering) ===\n",
405            results.len()
406        );
407        let mut groups: std::collections::BTreeMap<&str, Vec<&RedundantDefinitionEntry>> =
408            std::collections::BTreeMap::new();
409        for r in &results {
410            groups.entry(r.target.as_str()).or_default().push(r);
411        }
412        let mut multi: Vec<_> = groups.iter().filter(|(_, v)| v.len() >= 2).collect();
413        multi.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
414        eprintln!("Multi-wrapper clusters: {}\n", multi.len());
415        for (target, members) in &multi {
416            eprintln!("  {} ← {}", target, members.len());
417            for m in members.iter().take(3) {
418                eprintln!("      {} at {}:{}", m.wrapper, m.file, m.line);
419            }
420            if members.len() > 3 {
421                eprintln!("      ... +{} more", members.len() - 3);
422            }
423        }
424        eprintln!("\nFirst 30 hits:\n");
425        for r in results.iter().take(30) {
426            eprintln!("  {} -> {}  ({}:{})", r.wrapper, r.target, r.file, r.line);
427        }
428    }
429
430    #[test]
431    fn skips_multi_statement_body() {
432        let source = r#"
433pub fn complex(&self) {
434    let x = 1;
435    self.do_thing(x, None);
436}
437        "#;
438        let mut out = Vec::new();
439        scan_rust_source(source, "x.rs", &[], &mut out);
440        assert!(
441            out.is_empty(),
442            "multi-statement should not match: {:?}",
443            out
444        );
445    }
446}