Skip to main content

codelens_engine/
redundant_definitions.rs

1//! Detects "thin wrapper" definitions — functions whose entire body is a
2//! single call to another function with one literal default argument
3//! (e.g. `pub fn record_X(&self) { self.record_X_for_session(None) }`).
4//!
5//! This is a syntactic-only detector that catches the exact pattern flagged
6//! by self-dogfooding in v1.13.0 Phase 1-A: 16 `record_*` wrappers all
7//! forwarding to their `_for_session(None)` substrate. The substrate could
8//! be flagged by `find_dead_code_v2` only AFTER the wrappers were removed,
9//! so this complement helps surface the cluster pre-deletion.
10
11use crate::project::{ProjectRoot, collect_files};
12use anyhow::Result;
13use regex::Regex;
14use serde::Serialize;
15use std::path::Path;
16use std::sync::LazyLock;
17
18/// Matches a Rust one-line wrapper:
19///   `pub fn NAME(args) [-> RetType] { self.OTHER(args, LITERAL) [;] }`
20///   `fn NAME(args) [-> RetType] { OTHER(args, LITERAL) [;] }`
21/// where LITERAL is one of: `None`, `Default::default()`, `false`, `true`,
22/// a bare integer literal, or a quoted string literal.
23static RUST_ONE_LINE_WRAPPER_RE: LazyLock<Regex> = LazyLock::new(|| {
24    // (?m): multiline (^/$ are line anchors).
25    // Inner args use [^){};|] to reject:
26    //   - `;` (statement boundary — would mean multi-statement body)
27    //   - `{` `}` (block — closure body, struct literal)
28    //   - `|` (closure pipe — `|x| ...` indicates non-trivial logic, not a literal default)
29    // Default group is REQUIRED (no trailing `?`): a wrapper without a literal
30    // default is just an alias / passthrough, not the cleanup-target shape this
31    // detector exists for. Codex review on PR #148 (P1).
32    Regex::new(
33        r#"(?m)^\s*(?:pub(?:\([^)]*\))?\s+)?fn\s+(?P<wrapper>[A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?:->\s*[^{]+?)?\s*\{\s*(?:self\.|Self::)?(?P<target>[A-Za-z_][A-Za-z0-9_]*)\s*\([^){};|]*?(?P<default>None|Default::default\(\)|true|false|-?\d+|"[^"]*")\s*\)\s*;?\s*\}\s*$"#
34    ).unwrap()
35});
36
37#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
38pub struct RedundantDefinitionEntry {
39    pub file: String,
40    pub wrapper: String,
41    pub target: String,
42    pub line: usize,
43    pub default_arg: Option<String>,
44    pub kind: &'static str,
45}
46
47/// Finds Rust one-line wrappers in the project.
48///
49/// Returns a list of (wrapper, target) pairs. Callers can group by `target`
50/// to find substrates with multiple wrappers — the highest-leverage cleanup
51/// opportunity per Phase 1-A's findings.
52pub fn find_redundant_definitions(
53    project: &ProjectRoot,
54    max_results: usize,
55) -> Result<Vec<RedundantDefinitionEntry>> {
56    let mut results: Vec<RedundantDefinitionEntry> = Vec::new();
57    let candidates = collect_files(project.as_path(), is_rust_file)?;
58
59    // Codex P2 (PR #148): scan every file, then sort and truncate. Mid-walk
60    // `break` produced unstable ordering — clusters could be split across
61    // truncation depending on file iteration order, hiding the highest-
62    // leverage targets that the dogfood loop relies on.
63    for path in &candidates {
64        let source = match std::fs::read_to_string(path) {
65            Ok(s) => s,
66            Err(_) => continue,
67        };
68        let relative = project.to_relative(path);
69        if is_test_file(&relative) {
70            continue;
71        }
72        // Codex P1 (PR #149): scan the original source so reported line
73        // numbers match the file the user actually opens. Skip matches that
74        // fall inside `#[cfg(test)] mod ...` ranges instead of stripping
75        // them — stripping rewrites line offsets and made every location
76        // shift after the first cfg(test) block.
77        let cfg_test_ranges = collect_cfg_test_ranges(&source);
78        scan_rust_source(&source, &relative, &cfg_test_ranges, &mut results);
79    }
80
81    results.sort_by(|a, b| {
82        a.target
83            .cmp(&b.target)
84            .then(a.file.cmp(&b.file))
85            .then(a.line.cmp(&b.line))
86    });
87    if max_results > 0 && results.len() > max_results {
88        results.truncate(max_results);
89    }
90    Ok(results)
91}
92
93fn scan_rust_source(
94    source: &str,
95    file: &str,
96    skip_ranges: &[(usize, usize)],
97    out: &mut Vec<RedundantDefinitionEntry>,
98) {
99    for m in RUST_ONE_LINE_WRAPPER_RE.captures_iter(source) {
100        let match_start = m.get(0).map(|m| m.start()).unwrap_or(0);
101        if range_contains(skip_ranges, match_start) {
102            continue;
103        }
104        let wrapper = m
105            .name("wrapper")
106            .map(|m| m.as_str().to_owned())
107            .unwrap_or_default();
108        let target = m
109            .name("target")
110            .map(|m| m.as_str().to_owned())
111            .unwrap_or_default();
112        if wrapper.is_empty() || target.is_empty() || wrapper == target {
113            continue;
114        }
115        let default_arg = m.name("default").map(|m| m.as_str().to_owned());
116        // Use the wrapper-name capture, not the whole match, so leading
117        // whitespace (including blank lines that `^\s*` swallows) does not
118        // shift the reported line backward.
119        let wrapper_start = m.name("wrapper").map(|m| m.start()).unwrap_or(match_start);
120        let line = byte_offset_to_line(source, wrapper_start);
121        out.push(RedundantDefinitionEntry {
122            file: file.to_owned(),
123            wrapper,
124            target,
125            line,
126            default_arg,
127            kind: "rust_one_line_wrapper",
128        });
129    }
130}
131
132fn range_contains(ranges: &[(usize, usize)], offset: usize) -> bool {
133    ranges
134        .iter()
135        .any(|(start, end)| offset >= *start && offset < *end)
136}
137
138fn byte_offset_to_line(source: &str, offset: usize) -> usize {
139    source[..offset.min(source.len())].matches('\n').count() + 1
140}
141
142fn is_rust_file(path: &Path) -> bool {
143    path.extension().and_then(|s| s.to_str()) == Some("rs")
144}
145
146/// Skip canonical test/example/bench harness files. Heuristic: any path
147/// segment named `tests`, `bench`, `benches`, `examples`, or any file
148/// ending in `_tests.rs`/`_test.rs`. Self-detector module is also
149/// skipped to avoid flagging its own fixture strings.
150fn is_test_file(relative: &str) -> bool {
151    if relative == "crates/codelens-engine/src/redundant_definitions.rs" {
152        return true;
153    }
154    let lower = relative.to_ascii_lowercase();
155    if lower.ends_with("_tests.rs") || lower.ends_with("_test.rs") {
156        return true;
157    }
158    lower.split('/').any(|seg| {
159        matches!(
160            seg,
161            "tests" | "test" | "bench" | "benches" | "examples" | "fixtures"
162        )
163    })
164}
165
166/// Returns byte ranges (start, end) for every `#[cfg(test)] mod NAME { ... }`
167/// block in `source`. Ranges are half-open: `[start, end)` covers the
168/// header through the closing `}` inclusive. Used by the scanner to skip
169/// matches that land inside test-only modules WITHOUT rewriting offsets,
170/// so reported line numbers stay accurate (Codex review on PR #149, P1).
171fn collect_cfg_test_ranges(source: &str) -> Vec<(usize, usize)> {
172    let mut ranges = Vec::new();
173    let mut idx = 0;
174    let bytes = source.as_bytes();
175    while idx < bytes.len() {
176        if bytes[idx] == b'#' {
177            let rest = &source[idx..];
178            if let Some(open_brace_offset) = match_cfg_test_mod_header(rest) {
179                let body_start = idx + open_brace_offset;
180                if let Some(body_end) = find_matching_brace(source, body_start) {
181                    let block_end = body_end + 1;
182                    ranges.push((idx, block_end));
183                    idx = block_end;
184                    continue;
185                }
186            }
187        }
188        idx += 1;
189    }
190    ranges
191}
192
193/// Removes `#[cfg(test)] mod ... { ... }` blocks (matched by regex with
194/// nested braces handled by depth counting). Retained for the unit test
195/// that still exercises the stripping behavior; the production scanner
196/// now uses `collect_cfg_test_ranges` to preserve line numbers.
197#[cfg_attr(not(test), allow(dead_code))]
198fn strip_cfg_test_modules(source: &str) -> String {
199    let mut out = String::with_capacity(source.len());
200    let mut chars = source.char_indices().peekable();
201    while let Some(&(idx, ch)) = chars.peek() {
202        if ch == '#' {
203            // try to match #[cfg(test)] mod NAME {
204            let rest = &source[idx..];
205            if let Some(open_brace_offset) = match_cfg_test_mod_header(rest) {
206                let body_start = idx + open_brace_offset;
207                let body_end = match find_matching_brace(source, body_start) {
208                    Some(e) => e,
209                    None => {
210                        out.push(ch);
211                        chars.next();
212                        continue;
213                    }
214                };
215                // skip over the entire `#[cfg(test)] mod NAME { ... }`
216                while let Some(&(j, _)) = chars.peek() {
217                    if j > body_end {
218                        break;
219                    }
220                    chars.next();
221                }
222                continue;
223            }
224        }
225        out.push(ch);
226        chars.next();
227    }
228    out
229}
230
231/// If `rest` starts with `#[cfg(test)] mod NAME {`, return the byte
232/// offset of the `{`. Otherwise None. Tolerant of attribute whitespace
233/// variants but not of comments inside the attribute.
234fn match_cfg_test_mod_header(rest: &str) -> Option<usize> {
235    static HEADER_RE: LazyLock<Regex> = LazyLock::new(|| {
236        Regex::new(r"^#\s*\[\s*cfg\s*\(\s*test\s*\)\s*\]\s*(?:pub(?:\([^)]*\))?\s+)?mod\s+[A-Za-z_][A-Za-z0-9_]*\s*\{")
237            .unwrap()
238    });
239    HEADER_RE.find(rest).map(|m| m.end() - 1)
240}
241
242/// Returns the byte index of the matching `}` for the `{` at `open_idx`.
243fn find_matching_brace(source: &str, open_idx: usize) -> Option<usize> {
244    let bytes = source.as_bytes();
245    if bytes.get(open_idx) != Some(&b'{') {
246        return None;
247    }
248    let mut depth = 1usize;
249    let mut i = open_idx + 1;
250    while i < bytes.len() {
251        match bytes[i] {
252            b'{' => depth += 1,
253            b'}' => {
254                depth -= 1;
255                if depth == 0 {
256                    return Some(i);
257                }
258            }
259            _ => {}
260        }
261        i += 1;
262    }
263    None
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn detects_self_dot_wrapper_with_none_default() {
272        let source = r#"
273impl Foo {
274    pub fn record_x(&self) { self.record_x_for_session(None) }
275    pub fn record_y(&self) { self.record_y_for_session(None); }
276}
277        "#;
278        let mut out = Vec::new();
279        scan_rust_source(source, "telemetry.rs", &[], &mut out);
280        assert_eq!(out.len(), 2, "got: {:?}", out);
281        assert_eq!(out[0].wrapper, "record_x");
282        assert_eq!(out[0].target, "record_x_for_session");
283        assert_eq!(out[0].default_arg.as_deref(), Some("None"));
284        assert_eq!(out[1].wrapper, "record_y");
285    }
286
287    #[test]
288    fn detects_bare_function_wrapper() {
289        let source = r#"
290pub fn helper(x: u32) -> bool { inner(x, false) }
291        "#;
292        let mut out = Vec::new();
293        scan_rust_source(source, "lib.rs", &[], &mut out);
294        assert_eq!(out.len(), 1);
295        assert_eq!(out[0].wrapper, "helper");
296        assert_eq!(out[0].target, "inner");
297        assert_eq!(out[0].default_arg.as_deref(), Some("false"));
298    }
299
300    #[test]
301    fn skips_zero_arg_passthrough_without_literal_default() {
302        // Codex P1 (PR #148): a wrapper with no literal default is just an
303        // alias, not the cleanup-target shape this detector exists for.
304        let source = r#"
305pub fn alias(x: u32) { inner(x) }
306pub fn passthrough(a: u32, b: u32) -> u32 { other(a, b) }
307        "#;
308        let mut out = Vec::new();
309        scan_rust_source(source, "x.rs", &[], &mut out);
310        assert!(
311            out.is_empty(),
312            "no-default forwards must not match: {:?}",
313            out
314        );
315    }
316
317    #[test]
318    fn line_numbers_reflect_original_source_after_cfg_test_skip() {
319        // Codex P1 (PR #149): when a `#[cfg(test)] mod` block precedes the
320        // production wrapper, the reported line must still match the
321        // original file — stripping the test block previously rewrote
322        // offsets and shifted every subsequent line.
323        // Fixture is 7 lines: cfg(test)+mod open at L1, body L2-L4, mod
324        // close at L5, blank L6, the production wrapper at L7.
325        let source = "#[cfg(test)]\nmod tests {\n    fn helper() {\n        foo(true, false);\n    }\n}\n\npub fn record_x(&self) { self.record_x_for_session(None) }\n";
326        let ranges = collect_cfg_test_ranges(source);
327        let mut out = Vec::new();
328        scan_rust_source(source, "events.rs", &ranges, &mut out);
329        assert_eq!(out.len(), 1, "got: {:?}", out);
330        assert_eq!(out[0].wrapper, "record_x");
331        assert_eq!(
332            out[0].line, 8,
333            "expected the wrapper to be reported at the original source line"
334        );
335    }
336
337    #[test]
338    fn skips_call_with_closure_arg() {
339        // Detector v3: a body that passes a closure to its delegate is NOT a thin
340        // wrapper — the closure carries logic. This was the false-positive shape
341        // that flagged `mutate_session_metrics` 13× during v1.13.3 dogfooding.
342        let source = r#"
343pub fn record_x(&self) {
344    self.mutate_session_metrics(None, |session| {
345        session.foo += 1;
346    });
347}
348        "#;
349        let mut out = Vec::new();
350        scan_rust_source(source, "events.rs", &[], &mut out);
351        assert!(out.is_empty(), "closure-arg call must not match: {:?}", out);
352    }
353
354    #[test]
355    fn skips_self_recursive_call() {
356        let source = r#"
357pub fn loop_me(&self) { self.loop_me(0) }
358        "#;
359        let mut out = Vec::new();
360        scan_rust_source(source, "x.rs", &[], &mut out);
361        // wrapper == target → not flagged (would be infinite recursion, not delegation)
362        assert!(out.is_empty(), "got: {:?}", out);
363    }
364
365    #[test]
366    fn strip_cfg_test_modules_removes_test_block() {
367        let source = r#"
368pub fn real_thing(&self) { self.real_thing_inner(None) }
369
370#[cfg(test)]
371mod tests {
372    fn helper() { foo(true) }
373}
374"#;
375        let stripped = super::strip_cfg_test_modules(source);
376        assert!(stripped.contains("real_thing"));
377        assert!(
378            !stripped.contains("foo(true)"),
379            "test mod should be gone: {}",
380            stripped
381        );
382    }
383
384    #[test]
385    fn is_test_file_recognizes_canonical_paths() {
386        assert!(super::is_test_file("crates/foo/tests/something.rs"));
387        assert!(super::is_test_file("crates/foo/src/internals_tests.rs"));
388        assert!(super::is_test_file("benchmarks/bench/runner.rs"));
389        assert!(!super::is_test_file("crates/foo/src/main.rs"));
390        assert!(super::is_test_file(
391            "crates/codelens-engine/src/redundant_definitions.rs"
392        ));
393    }
394
395    #[test]
396    #[ignore]
397    fn dogfood_self_repo() {
398        // Run with: cargo test -p codelens-engine dogfood_self_repo -- --ignored --nocapture
399        // Codex P2 (PR #149): derive workspace root from CARGO_MANIFEST_DIR
400        // (the engine crate's directory) → parent twice → repo root. Any
401        // contributor's clone path works without hardcoding `/Users/...`.
402        let repo = std::env::var("CODELENS_REPO_ROOT").unwrap_or_else(|_| {
403            std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
404                .ancestors()
405                .nth(2)
406                .expect("workspace root not found above CARGO_MANIFEST_DIR")
407                .to_string_lossy()
408                .into_owned()
409        });
410        let project = crate::project::ProjectRoot::new(repo).expect("project root");
411        let results =
412            super::find_redundant_definitions(&project, 200).expect("find_redundant_definitions");
413        eprintln!(
414            "\n=== {} redundant definitions in self (post v2 filtering) ===\n",
415            results.len()
416        );
417        let mut groups: std::collections::BTreeMap<&str, Vec<&RedundantDefinitionEntry>> =
418            std::collections::BTreeMap::new();
419        for r in &results {
420            groups.entry(r.target.as_str()).or_default().push(r);
421        }
422        let mut multi: Vec<_> = groups.iter().filter(|(_, v)| v.len() >= 2).collect();
423        multi.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
424        eprintln!("Multi-wrapper clusters: {}\n", multi.len());
425        for (target, members) in &multi {
426            eprintln!("  {} ← {}", target, members.len());
427            for m in members.iter().take(3) {
428                eprintln!("      {} at {}:{}", m.wrapper, m.file, m.line);
429            }
430            if members.len() > 3 {
431                eprintln!("      ... +{} more", members.len() - 3);
432            }
433        }
434        eprintln!("\nFirst 30 hits:\n");
435        for r in results.iter().take(30) {
436            eprintln!("  {} -> {}  ({}:{})", r.wrapper, r.target, r.file, r.line);
437        }
438    }
439
440    #[test]
441    fn skips_multi_statement_body() {
442        let source = r#"
443pub fn complex(&self) {
444    let x = 1;
445    self.do_thing(x, None);
446}
447        "#;
448        let mut out = Vec::new();
449        scan_rust_source(source, "x.rs", &[], &mut out);
450        assert!(
451            out.is_empty(),
452            "multi-statement should not match: {:?}",
453            out
454        );
455    }
456}