Skip to main content

fallow_core/
cross_reference.rs

1//! Cross-reference duplication findings with dead code analysis results.
2//!
3//! When code is both duplicated AND unused, it's a higher-priority finding:
4//! the duplicate can be safely removed without any refactoring. This module
5//! identifies such combined findings.
6
7use rustc_hash::FxHashSet;
8use std::path::PathBuf;
9
10use serde::Serialize;
11
12use crate::duplicates::types::{CloneInstance, DuplicationReport};
13use crate::results::AnalysisResults;
14
15/// A combined finding where a clone instance overlaps with a dead code issue.
16#[derive(Debug, Clone, Serialize)]
17pub struct CombinedFinding {
18    /// The clone instance that is also unused.
19    pub clone_instance: CloneInstance,
20    /// What kind of dead code overlaps with this clone.
21    pub dead_code_kind: DeadCodeKind,
22    /// Clone group index (for associating with the parent group).
23    pub group_index: usize,
24}
25
26/// The type of dead code that overlaps with a clone instance.
27#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
28pub enum DeadCodeKind {
29    /// The entire file containing the clone is unused.
30    UnusedFile,
31    /// A specific unused export overlaps with the clone's line range.
32    UnusedExport { export_name: String },
33    /// A specific unused type overlaps with the clone's line range.
34    UnusedType { type_name: String },
35}
36
37/// Result of cross-referencing duplication with dead code analysis.
38#[derive(Debug, Clone, Serialize)]
39pub struct CrossReferenceResult {
40    /// Clone instances that are also dead code (safe to delete).
41    pub combined_findings: Vec<CombinedFinding>,
42    /// Number of clone instances in unused files.
43    pub clones_in_unused_files: usize,
44    /// Number of clone instances overlapping unused exports.
45    pub clones_with_unused_exports: usize,
46}
47
48/// Cross-reference duplication findings with dead code analysis results.
49///
50/// For each clone instance, checks whether:
51/// 1. The file is entirely unused (in `unused_files`)
52/// 2. An unused export/type at the same line range overlaps
53///
54/// Returns combined findings sorted by priority (unused files first, then exports).
55#[must_use]
56pub fn cross_reference(
57    duplication: &DuplicationReport,
58    dead_code: &AnalysisResults,
59) -> CrossReferenceResult {
60    // Build lookup sets for fast checking
61    let unused_files: FxHashSet<&PathBuf> =
62        dead_code.unused_files.iter().map(|f| &f.path).collect();
63
64    let mut combined_findings = Vec::new();
65    let mut clones_in_unused_files = 0usize;
66    let mut clones_with_unused_exports = 0usize;
67
68    for (group_idx, group) in duplication.clone_groups.iter().enumerate() {
69        for instance in &group.instances {
70            // Check 1: Is the file entirely unused?
71            if unused_files.contains(&instance.file) {
72                combined_findings.push(CombinedFinding {
73                    clone_instance: instance.clone(),
74                    dead_code_kind: DeadCodeKind::UnusedFile,
75                    group_index: group_idx,
76                });
77                clones_in_unused_files += 1;
78                continue; // No need to check exports if entire file is unused
79            }
80
81            // Check 2: Does an unused export/type overlap with this clone's line range?
82            if let Some(finding) = find_overlapping_unused_export(instance, group_idx, dead_code) {
83                clones_with_unused_exports += 1;
84                combined_findings.push(finding);
85            }
86        }
87    }
88
89    CrossReferenceResult {
90        combined_findings,
91        clones_in_unused_files,
92        clones_with_unused_exports,
93    }
94}
95
96/// Check if any unused export/type overlaps with the clone instance's line range.
97fn find_overlapping_unused_export(
98    instance: &CloneInstance,
99    group_index: usize,
100    dead_code: &AnalysisResults,
101) -> Option<CombinedFinding> {
102    // Check unused exports
103    for export in &dead_code.unused_exports {
104        if export.path == instance.file
105            && (export.line as usize) >= instance.start_line
106            && (export.line as usize) <= instance.end_line
107        {
108            return Some(CombinedFinding {
109                clone_instance: instance.clone(),
110                dead_code_kind: DeadCodeKind::UnusedExport {
111                    export_name: export.export_name.clone(),
112                },
113                group_index,
114            });
115        }
116    }
117
118    // Check unused types
119    for type_export in &dead_code.unused_types {
120        if type_export.path == instance.file
121            && (type_export.line as usize) >= instance.start_line
122            && (type_export.line as usize) <= instance.end_line
123        {
124            return Some(CombinedFinding {
125                clone_instance: instance.clone(),
126                dead_code_kind: DeadCodeKind::UnusedType {
127                    type_name: type_export.export_name.clone(),
128                },
129                group_index,
130            });
131        }
132    }
133
134    None
135}
136
137/// Summary statistics for cross-referenced findings.
138impl CrossReferenceResult {
139    /// Total number of combined findings.
140    #[must_use]
141    pub const fn total(&self) -> usize {
142        self.combined_findings.len()
143    }
144
145    /// Whether any combined findings exist.
146    #[must_use]
147    pub const fn has_findings(&self) -> bool {
148        !self.combined_findings.is_empty()
149    }
150
151    /// Get clone groups that have at least one combined finding, with their indices.
152    #[must_use]
153    pub fn affected_group_indices(&self) -> FxHashSet<usize> {
154        self.combined_findings
155            .iter()
156            .map(|f| f.group_index)
157            .collect()
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use crate::duplicates::CloneGroup;
165    use crate::results::{UnusedExport, UnusedFile};
166
167    fn make_instance(file: &str, start: usize, end: usize) -> CloneInstance {
168        CloneInstance {
169            file: PathBuf::from(file),
170            start_line: start,
171            end_line: end,
172            start_col: 0,
173            end_col: 0,
174            fragment: String::new(),
175        }
176    }
177
178    fn make_group(instances: Vec<CloneInstance>) -> CloneGroup {
179        CloneGroup {
180            instances,
181            token_count: 50,
182            line_count: 10,
183        }
184    }
185
186    #[test]
187    fn empty_inputs_produce_no_findings() {
188        let duplication = DuplicationReport {
189            clone_groups: vec![],
190            clone_families: vec![],
191            stats: crate::duplicates::types::DuplicationStats {
192                total_files: 0,
193                files_with_clones: 0,
194                total_lines: 0,
195                duplicated_lines: 0,
196                total_tokens: 0,
197                duplicated_tokens: 0,
198                clone_groups: 0,
199                clone_instances: 0,
200                duplication_percentage: 0.0,
201            },
202        };
203        let dead_code = AnalysisResults::default();
204
205        let result = cross_reference(&duplication, &dead_code);
206        assert!(!result.has_findings());
207        assert_eq!(result.total(), 0);
208    }
209
210    #[test]
211    fn detects_clone_in_unused_file() {
212        let duplication = DuplicationReport {
213            clone_groups: vec![make_group(vec![
214                make_instance("src/a.ts", 1, 10),
215                make_instance("src/b.ts", 1, 10),
216            ])],
217            clone_families: vec![],
218            stats: crate::duplicates::types::DuplicationStats {
219                total_files: 2,
220                files_with_clones: 2,
221                total_lines: 20,
222                duplicated_lines: 10,
223                total_tokens: 100,
224                duplicated_tokens: 50,
225                clone_groups: 1,
226                clone_instances: 2,
227                duplication_percentage: 50.0,
228            },
229        };
230        let mut dead_code = AnalysisResults::default();
231        dead_code.unused_files.push(UnusedFile {
232            path: PathBuf::from("src/a.ts"),
233        });
234
235        let result = cross_reference(&duplication, &dead_code);
236        assert!(result.has_findings());
237        assert_eq!(result.clones_in_unused_files, 1);
238        assert_eq!(
239            result.combined_findings[0].dead_code_kind,
240            DeadCodeKind::UnusedFile
241        );
242    }
243
244    #[test]
245    fn detects_clone_overlapping_unused_export() {
246        let duplication = DuplicationReport {
247            clone_groups: vec![make_group(vec![
248                make_instance("src/a.ts", 5, 15),
249                make_instance("src/b.ts", 5, 15),
250            ])],
251            clone_families: vec![],
252            stats: crate::duplicates::types::DuplicationStats {
253                total_files: 2,
254                files_with_clones: 2,
255                total_lines: 20,
256                duplicated_lines: 10,
257                total_tokens: 100,
258                duplicated_tokens: 50,
259                clone_groups: 1,
260                clone_instances: 2,
261                duplication_percentage: 50.0,
262            },
263        };
264        let mut dead_code = AnalysisResults::default();
265        dead_code.unused_exports.push(UnusedExport {
266            path: PathBuf::from("src/a.ts"),
267            export_name: "processData".to_string(),
268            is_type_only: false,
269            line: 5,
270            col: 0,
271            span_start: 0,
272            is_re_export: false,
273        });
274
275        let result = cross_reference(&duplication, &dead_code);
276        assert!(result.has_findings());
277        assert_eq!(result.clones_with_unused_exports, 1);
278        assert!(matches!(
279            &result.combined_findings[0].dead_code_kind,
280            DeadCodeKind::UnusedExport { export_name } if export_name == "processData"
281        ));
282    }
283
284    #[test]
285    fn no_findings_when_no_overlap() {
286        let duplication = DuplicationReport {
287            clone_groups: vec![make_group(vec![
288                make_instance("src/a.ts", 5, 15),
289                make_instance("src/b.ts", 5, 15),
290            ])],
291            clone_families: vec![],
292            stats: crate::duplicates::types::DuplicationStats {
293                total_files: 2,
294                files_with_clones: 2,
295                total_lines: 20,
296                duplicated_lines: 10,
297                total_tokens: 100,
298                duplicated_tokens: 50,
299                clone_groups: 1,
300                clone_instances: 2,
301                duplication_percentage: 50.0,
302            },
303        };
304        let mut dead_code = AnalysisResults::default();
305        // Unused export on a different line range
306        dead_code.unused_exports.push(UnusedExport {
307            path: PathBuf::from("src/a.ts"),
308            export_name: "other".to_string(),
309            is_type_only: false,
310            line: 20, // outside clone range 5-15
311            col: 0,
312            span_start: 0,
313            is_re_export: false,
314        });
315
316        let result = cross_reference(&duplication, &dead_code);
317        assert!(!result.has_findings());
318    }
319
320    #[test]
321    fn affected_group_indices() {
322        let duplication = DuplicationReport {
323            clone_groups: vec![
324                make_group(vec![
325                    make_instance("src/a.ts", 1, 10),
326                    make_instance("src/b.ts", 1, 10),
327                ]),
328                make_group(vec![
329                    make_instance("src/c.ts", 1, 10),
330                    make_instance("src/d.ts", 1, 10),
331                ]),
332            ],
333            clone_families: vec![],
334            stats: crate::duplicates::types::DuplicationStats {
335                total_files: 4,
336                files_with_clones: 4,
337                total_lines: 40,
338                duplicated_lines: 20,
339                total_tokens: 200,
340                duplicated_tokens: 100,
341                clone_groups: 2,
342                clone_instances: 4,
343                duplication_percentage: 50.0,
344            },
345        };
346        let mut dead_code = AnalysisResults::default();
347        dead_code.unused_files.push(UnusedFile {
348            path: PathBuf::from("src/c.ts"),
349        });
350
351        let result = cross_reference(&duplication, &dead_code);
352        let affected = result.affected_group_indices();
353        assert!(!affected.contains(&0)); // Group 0 not affected
354        assert!(affected.contains(&1)); // Group 1 has clone in unused file
355    }
356
357    #[test]
358    fn unused_file_takes_priority_over_export() {
359        // If a file is unused AND has unused exports, we should only get the
360        // UnusedFile finding (not both), because the continue skips export checks.
361        let duplication = DuplicationReport {
362            clone_groups: vec![make_group(vec![
363                make_instance("src/a.ts", 5, 15),
364                make_instance("src/b.ts", 5, 15),
365            ])],
366            clone_families: vec![],
367            stats: crate::duplicates::types::DuplicationStats {
368                total_files: 2,
369                files_with_clones: 2,
370                total_lines: 20,
371                duplicated_lines: 10,
372                total_tokens: 100,
373                duplicated_tokens: 50,
374                clone_groups: 1,
375                clone_instances: 2,
376                duplication_percentage: 50.0,
377            },
378        };
379        let mut dead_code = AnalysisResults::default();
380        dead_code.unused_files.push(UnusedFile {
381            path: PathBuf::from("src/a.ts"),
382        });
383        dead_code.unused_exports.push(UnusedExport {
384            path: PathBuf::from("src/a.ts"),
385            export_name: "foo".to_string(),
386            is_type_only: false,
387            line: 10,
388            col: 0,
389            span_start: 0,
390            is_re_export: false,
391        });
392
393        let result = cross_reference(&duplication, &dead_code);
394        // Only 1 finding for src/a.ts (the unused file), not 2
395        let a_findings: Vec<_> = result
396            .combined_findings
397            .iter()
398            .filter(|f| f.clone_instance.file == std::path::Path::new("src/a.ts"))
399            .collect();
400        assert_eq!(a_findings.len(), 1);
401        assert_eq!(a_findings[0].dead_code_kind, DeadCodeKind::UnusedFile);
402    }
403
404    #[test]
405    fn detects_clone_overlapping_unused_type() {
406        let duplication = DuplicationReport {
407            clone_groups: vec![make_group(vec![
408                make_instance("src/types.ts", 1, 20),
409                make_instance("src/other.ts", 1, 20),
410            ])],
411            clone_families: vec![],
412            stats: crate::duplicates::types::DuplicationStats {
413                total_files: 2,
414                files_with_clones: 2,
415                total_lines: 40,
416                duplicated_lines: 20,
417                total_tokens: 100,
418                duplicated_tokens: 50,
419                clone_groups: 1,
420                clone_instances: 2,
421                duplication_percentage: 50.0,
422            },
423        };
424        let mut dead_code = AnalysisResults::default();
425        dead_code.unused_types.push(UnusedExport {
426            path: PathBuf::from("src/types.ts"),
427            export_name: "OldInterface".to_string(),
428            is_type_only: true,
429            line: 10,
430            col: 0,
431            span_start: 0,
432            is_re_export: false,
433        });
434
435        let result = cross_reference(&duplication, &dead_code);
436        assert!(result.has_findings());
437        assert!(matches!(
438            &result.combined_findings[0].dead_code_kind,
439            DeadCodeKind::UnusedType { type_name } if type_name == "OldInterface"
440        ));
441    }
442
443    #[test]
444    fn empty_result_methods() {
445        let result = CrossReferenceResult {
446            combined_findings: vec![],
447            clones_in_unused_files: 0,
448            clones_with_unused_exports: 0,
449        };
450        assert_eq!(result.total(), 0);
451        assert!(!result.has_findings());
452        assert!(result.affected_group_indices().is_empty());
453    }
454
455    #[test]
456    fn multiple_groups_with_findings() {
457        let duplication = DuplicationReport {
458            clone_groups: vec![
459                make_group(vec![
460                    make_instance("src/a.ts", 1, 10),
461                    make_instance("src/b.ts", 1, 10),
462                ]),
463                make_group(vec![
464                    make_instance("src/c.ts", 5, 15),
465                    make_instance("src/d.ts", 5, 15),
466                ]),
467                make_group(vec![
468                    make_instance("src/e.ts", 1, 10),
469                    make_instance("src/f.ts", 1, 10),
470                ]),
471            ],
472            clone_families: vec![],
473            stats: crate::duplicates::types::DuplicationStats {
474                total_files: 6,
475                files_with_clones: 6,
476                total_lines: 60,
477                duplicated_lines: 30,
478                total_tokens: 300,
479                duplicated_tokens: 150,
480                clone_groups: 3,
481                clone_instances: 6,
482                duplication_percentage: 50.0,
483            },
484        };
485        let mut dead_code = AnalysisResults::default();
486        dead_code.unused_files.push(UnusedFile {
487            path: PathBuf::from("src/a.ts"),
488        });
489        dead_code.unused_exports.push(UnusedExport {
490            path: PathBuf::from("src/c.ts"),
491            export_name: "helper".to_string(),
492            is_type_only: false,
493            line: 10,
494            col: 0,
495            span_start: 0,
496            is_re_export: false,
497        });
498
499        let result = cross_reference(&duplication, &dead_code);
500        assert_eq!(result.total(), 2);
501        assert_eq!(result.clones_in_unused_files, 1);
502        assert_eq!(result.clones_with_unused_exports, 1);
503
504        let affected = result.affected_group_indices();
505        assert!(affected.contains(&0)); // Group 0 has clone in unused file
506        assert!(affected.contains(&1)); // Group 1 has clone overlapping unused export
507        assert!(!affected.contains(&2)); // Group 2 unaffected
508    }
509
510    #[test]
511    fn clone_instance_outside_export_line_range() {
512        // Clone instance at lines 1-5, unused export at line 10
513        // They don't overlap, so no finding
514        let duplication = DuplicationReport {
515            clone_groups: vec![make_group(vec![
516                make_instance("src/a.ts", 1, 5),
517                make_instance("src/b.ts", 1, 5),
518            ])],
519            clone_families: vec![],
520            stats: crate::duplicates::types::DuplicationStats::default(),
521        };
522        let mut dead_code = AnalysisResults::default();
523        dead_code.unused_exports.push(UnusedExport {
524            path: PathBuf::from("src/a.ts"),
525            export_name: "fn".to_string(),
526            is_type_only: false,
527            line: 10,
528            col: 0,
529            span_start: 0,
530            is_re_export: false,
531        });
532
533        let result = cross_reference(&duplication, &dead_code);
534        assert!(!result.has_findings());
535    }
536
537    #[test]
538    fn clone_in_different_file_than_unused_export() {
539        // Clone is in src/a.ts, unused export is in src/x.ts
540        let duplication = DuplicationReport {
541            clone_groups: vec![make_group(vec![
542                make_instance("src/a.ts", 5, 15),
543                make_instance("src/b.ts", 5, 15),
544            ])],
545            clone_families: vec![],
546            stats: crate::duplicates::types::DuplicationStats::default(),
547        };
548        let mut dead_code = AnalysisResults::default();
549        dead_code.unused_exports.push(UnusedExport {
550            path: PathBuf::from("src/x.ts"), // different file
551            export_name: "fn".to_string(),
552            is_type_only: false,
553            line: 10,
554            col: 0,
555            span_start: 0,
556            is_re_export: false,
557        });
558
559        let result = cross_reference(&duplication, &dead_code);
560        assert!(!result.has_findings());
561    }
562}