Skip to main content

fallow_core/
cross_reference.rs

1//! Cross-reference duplication findings with dead code analysis results.
2//!
3//! When code is both duplicated AND unused, it's a higher-priority finding:
4//! the duplicate can be safely removed without any refactoring. This module
5//! identifies such combined findings.
6
7use rustc_hash::FxHashSet;
8use std::path::PathBuf;
9
10use serde::Serialize;
11
12use crate::duplicates::types::{CloneInstance, DuplicationReport};
13use crate::results::AnalysisResults;
14
15/// A combined finding where a clone instance overlaps with a dead code issue.
16#[derive(Debug, Clone, Serialize)]
17pub struct CombinedFinding {
18    /// The clone instance that is also unused.
19    pub clone_instance: CloneInstance,
20    /// What kind of dead code overlaps with this clone.
21    pub dead_code_kind: DeadCodeKind,
22    /// Clone group index (for associating with the parent group).
23    pub group_index: usize,
24}
25
26/// The type of dead code that overlaps with a clone instance.
27#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
28pub enum DeadCodeKind {
29    /// The entire file containing the clone is unused.
30    UnusedFile,
31    /// A specific unused export overlaps with the clone's line range.
32    UnusedExport { export_name: String },
33    /// A specific unused type overlaps with the clone's line range.
34    UnusedType { type_name: String },
35}
36
37/// Result of cross-referencing duplication with dead code analysis.
38#[derive(Debug, Clone, Serialize)]
39pub struct CrossReferenceResult {
40    /// Clone instances that are also dead code (safe to delete).
41    pub combined_findings: Vec<CombinedFinding>,
42    /// Number of clone instances in unused files.
43    pub clones_in_unused_files: usize,
44    /// Number of clone instances overlapping unused exports.
45    pub clones_with_unused_exports: usize,
46}
47
48/// Cross-reference duplication findings with dead code analysis results.
49///
50/// For each clone instance, checks whether:
51/// 1. The file is entirely unused (in `unused_files`)
52/// 2. An unused export/type at the same line range overlaps
53///
54/// Returns combined findings sorted by priority (unused files first, then exports).
55#[must_use]
56pub fn cross_reference(
57    duplication: &DuplicationReport,
58    dead_code: &AnalysisResults,
59) -> CrossReferenceResult {
60    // Build lookup sets for fast checking
61    let unused_files: FxHashSet<&PathBuf> =
62        dead_code.unused_files.iter().map(|f| &f.path).collect();
63
64    let mut combined_findings = Vec::new();
65    let mut clones_in_unused_files = 0usize;
66    let mut clones_with_unused_exports = 0usize;
67
68    for (group_idx, group) in duplication.clone_groups.iter().enumerate() {
69        for instance in &group.instances {
70            // Check 1: Is the file entirely unused?
71            if unused_files.contains(&instance.file) {
72                combined_findings.push(CombinedFinding {
73                    clone_instance: instance.clone(),
74                    dead_code_kind: DeadCodeKind::UnusedFile,
75                    group_index: group_idx,
76                });
77                clones_in_unused_files += 1;
78                continue; // No need to check exports if entire file is unused
79            }
80
81            // Check 2: Does an unused export/type overlap with this clone's line range?
82            if let Some(finding) = find_overlapping_unused_export(instance, group_idx, dead_code) {
83                clones_with_unused_exports += 1;
84                combined_findings.push(finding);
85            }
86        }
87    }
88
89    CrossReferenceResult {
90        combined_findings,
91        clones_in_unused_files,
92        clones_with_unused_exports,
93    }
94}
95
96/// Check if any unused export/type overlaps with the clone instance's line range.
97fn find_overlapping_unused_export(
98    instance: &CloneInstance,
99    group_index: usize,
100    dead_code: &AnalysisResults,
101) -> Option<CombinedFinding> {
102    // Check unused exports
103    for export in &dead_code.unused_exports {
104        if export.path == instance.file
105            && (export.line as usize) >= instance.start_line
106            && (export.line as usize) <= instance.end_line
107        {
108            return Some(CombinedFinding {
109                clone_instance: instance.clone(),
110                dead_code_kind: DeadCodeKind::UnusedExport {
111                    export_name: export.export_name.clone(),
112                },
113                group_index,
114            });
115        }
116    }
117
118    // Check unused types
119    for type_export in &dead_code.unused_types {
120        if type_export.path == instance.file
121            && (type_export.line as usize) >= instance.start_line
122            && (type_export.line as usize) <= instance.end_line
123        {
124            return Some(CombinedFinding {
125                clone_instance: instance.clone(),
126                dead_code_kind: DeadCodeKind::UnusedType {
127                    type_name: type_export.export_name.clone(),
128                },
129                group_index,
130            });
131        }
132    }
133
134    None
135}
136
137/// Summary statistics for cross-referenced findings.
138impl CrossReferenceResult {
139    /// Total number of combined findings.
140    #[must_use]
141    pub const fn total(&self) -> usize {
142        self.combined_findings.len()
143    }
144
145    /// Whether any combined findings exist.
146    #[must_use]
147    pub const fn has_findings(&self) -> bool {
148        !self.combined_findings.is_empty()
149    }
150
151    /// Get clone groups that have at least one combined finding, with their indices.
152    #[must_use]
153    pub fn affected_group_indices(&self) -> FxHashSet<usize> {
154        self.combined_findings
155            .iter()
156            .map(|f| f.group_index)
157            .collect()
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use crate::duplicates::CloneGroup;
165    use crate::results::{UnusedExport, UnusedFile};
166
167    fn make_instance(file: &str, start: usize, end: usize) -> CloneInstance {
168        CloneInstance {
169            file: PathBuf::from(file),
170            start_line: start,
171            end_line: end,
172            start_col: 0,
173            end_col: 0,
174            fragment: String::new(),
175        }
176    }
177
178    fn make_group(instances: Vec<CloneInstance>) -> CloneGroup {
179        CloneGroup {
180            instances,
181            token_count: 50,
182            line_count: 10,
183        }
184    }
185
186    #[test]
187    fn empty_inputs_produce_no_findings() {
188        let duplication = DuplicationReport {
189            clone_groups: vec![],
190            clone_families: vec![],
191            mirrored_directories: vec![],
192            stats: crate::duplicates::types::DuplicationStats {
193                total_files: 0,
194                files_with_clones: 0,
195                total_lines: 0,
196                duplicated_lines: 0,
197                total_tokens: 0,
198                duplicated_tokens: 0,
199                clone_groups: 0,
200                clone_instances: 0,
201                duplication_percentage: 0.0,
202            },
203        };
204        let dead_code = AnalysisResults::default();
205
206        let result = cross_reference(&duplication, &dead_code);
207        assert!(!result.has_findings());
208        assert_eq!(result.total(), 0);
209    }
210
211    #[test]
212    fn detects_clone_in_unused_file() {
213        let duplication = DuplicationReport {
214            clone_groups: vec![make_group(vec![
215                make_instance("src/a.ts", 1, 10),
216                make_instance("src/b.ts", 1, 10),
217            ])],
218            clone_families: vec![],
219            mirrored_directories: vec![],
220            stats: crate::duplicates::types::DuplicationStats {
221                total_files: 2,
222                files_with_clones: 2,
223                total_lines: 20,
224                duplicated_lines: 10,
225                total_tokens: 100,
226                duplicated_tokens: 50,
227                clone_groups: 1,
228                clone_instances: 2,
229                duplication_percentage: 50.0,
230            },
231        };
232        let mut dead_code = AnalysisResults::default();
233        dead_code.unused_files.push(UnusedFile {
234            path: PathBuf::from("src/a.ts"),
235        });
236
237        let result = cross_reference(&duplication, &dead_code);
238        assert!(result.has_findings());
239        assert_eq!(result.clones_in_unused_files, 1);
240        assert_eq!(
241            result.combined_findings[0].dead_code_kind,
242            DeadCodeKind::UnusedFile
243        );
244    }
245
246    #[test]
247    fn detects_clone_overlapping_unused_export() {
248        let duplication = DuplicationReport {
249            clone_groups: vec![make_group(vec![
250                make_instance("src/a.ts", 5, 15),
251                make_instance("src/b.ts", 5, 15),
252            ])],
253            clone_families: vec![],
254            mirrored_directories: vec![],
255            stats: crate::duplicates::types::DuplicationStats {
256                total_files: 2,
257                files_with_clones: 2,
258                total_lines: 20,
259                duplicated_lines: 10,
260                total_tokens: 100,
261                duplicated_tokens: 50,
262                clone_groups: 1,
263                clone_instances: 2,
264                duplication_percentage: 50.0,
265            },
266        };
267        let mut dead_code = AnalysisResults::default();
268        dead_code.unused_exports.push(UnusedExport {
269            path: PathBuf::from("src/a.ts"),
270            export_name: "processData".to_string(),
271            is_type_only: false,
272            line: 5,
273            col: 0,
274            span_start: 0,
275            is_re_export: false,
276        });
277
278        let result = cross_reference(&duplication, &dead_code);
279        assert!(result.has_findings());
280        assert_eq!(result.clones_with_unused_exports, 1);
281        assert!(matches!(
282            &result.combined_findings[0].dead_code_kind,
283            DeadCodeKind::UnusedExport { export_name } if export_name == "processData"
284        ));
285    }
286
287    #[test]
288    fn no_findings_when_no_overlap() {
289        let duplication = DuplicationReport {
290            clone_groups: vec![make_group(vec![
291                make_instance("src/a.ts", 5, 15),
292                make_instance("src/b.ts", 5, 15),
293            ])],
294            clone_families: vec![],
295            mirrored_directories: vec![],
296            stats: crate::duplicates::types::DuplicationStats {
297                total_files: 2,
298                files_with_clones: 2,
299                total_lines: 20,
300                duplicated_lines: 10,
301                total_tokens: 100,
302                duplicated_tokens: 50,
303                clone_groups: 1,
304                clone_instances: 2,
305                duplication_percentage: 50.0,
306            },
307        };
308        let mut dead_code = AnalysisResults::default();
309        // Unused export on a different line range
310        dead_code.unused_exports.push(UnusedExport {
311            path: PathBuf::from("src/a.ts"),
312            export_name: "other".to_string(),
313            is_type_only: false,
314            line: 20, // outside clone range 5-15
315            col: 0,
316            span_start: 0,
317            is_re_export: false,
318        });
319
320        let result = cross_reference(&duplication, &dead_code);
321        assert!(!result.has_findings());
322    }
323
324    #[test]
325    fn affected_group_indices() {
326        let duplication = DuplicationReport {
327            clone_groups: vec![
328                make_group(vec![
329                    make_instance("src/a.ts", 1, 10),
330                    make_instance("src/b.ts", 1, 10),
331                ]),
332                make_group(vec![
333                    make_instance("src/c.ts", 1, 10),
334                    make_instance("src/d.ts", 1, 10),
335                ]),
336            ],
337            clone_families: vec![],
338            mirrored_directories: vec![],
339            stats: crate::duplicates::types::DuplicationStats {
340                total_files: 4,
341                files_with_clones: 4,
342                total_lines: 40,
343                duplicated_lines: 20,
344                total_tokens: 200,
345                duplicated_tokens: 100,
346                clone_groups: 2,
347                clone_instances: 4,
348                duplication_percentage: 50.0,
349            },
350        };
351        let mut dead_code = AnalysisResults::default();
352        dead_code.unused_files.push(UnusedFile {
353            path: PathBuf::from("src/c.ts"),
354        });
355
356        let result = cross_reference(&duplication, &dead_code);
357        let affected = result.affected_group_indices();
358        assert!(!affected.contains(&0)); // Group 0 not affected
359        assert!(affected.contains(&1)); // Group 1 has clone in unused file
360    }
361
362    #[test]
363    fn unused_file_takes_priority_over_export() {
364        // If a file is unused AND has unused exports, we should only get the
365        // UnusedFile finding (not both), because the continue skips export checks.
366        let duplication = DuplicationReport {
367            clone_groups: vec![make_group(vec![
368                make_instance("src/a.ts", 5, 15),
369                make_instance("src/b.ts", 5, 15),
370            ])],
371            clone_families: vec![],
372            mirrored_directories: vec![],
373            stats: crate::duplicates::types::DuplicationStats {
374                total_files: 2,
375                files_with_clones: 2,
376                total_lines: 20,
377                duplicated_lines: 10,
378                total_tokens: 100,
379                duplicated_tokens: 50,
380                clone_groups: 1,
381                clone_instances: 2,
382                duplication_percentage: 50.0,
383            },
384        };
385        let mut dead_code = AnalysisResults::default();
386        dead_code.unused_files.push(UnusedFile {
387            path: PathBuf::from("src/a.ts"),
388        });
389        dead_code.unused_exports.push(UnusedExport {
390            path: PathBuf::from("src/a.ts"),
391            export_name: "foo".to_string(),
392            is_type_only: false,
393            line: 10,
394            col: 0,
395            span_start: 0,
396            is_re_export: false,
397        });
398
399        let result = cross_reference(&duplication, &dead_code);
400        // Only 1 finding for src/a.ts (the unused file), not 2
401        let a_findings: Vec<_> = result
402            .combined_findings
403            .iter()
404            .filter(|f| f.clone_instance.file == std::path::Path::new("src/a.ts"))
405            .collect();
406        assert_eq!(a_findings.len(), 1);
407        assert_eq!(a_findings[0].dead_code_kind, DeadCodeKind::UnusedFile);
408    }
409
410    #[test]
411    fn detects_clone_overlapping_unused_type() {
412        let duplication = DuplicationReport {
413            clone_groups: vec![make_group(vec![
414                make_instance("src/types.ts", 1, 20),
415                make_instance("src/other.ts", 1, 20),
416            ])],
417            clone_families: vec![],
418            mirrored_directories: vec![],
419            stats: crate::duplicates::types::DuplicationStats {
420                total_files: 2,
421                files_with_clones: 2,
422                total_lines: 40,
423                duplicated_lines: 20,
424                total_tokens: 100,
425                duplicated_tokens: 50,
426                clone_groups: 1,
427                clone_instances: 2,
428                duplication_percentage: 50.0,
429            },
430        };
431        let mut dead_code = AnalysisResults::default();
432        dead_code.unused_types.push(UnusedExport {
433            path: PathBuf::from("src/types.ts"),
434            export_name: "OldInterface".to_string(),
435            is_type_only: true,
436            line: 10,
437            col: 0,
438            span_start: 0,
439            is_re_export: false,
440        });
441
442        let result = cross_reference(&duplication, &dead_code);
443        assert!(result.has_findings());
444        assert!(matches!(
445            &result.combined_findings[0].dead_code_kind,
446            DeadCodeKind::UnusedType { type_name } if type_name == "OldInterface"
447        ));
448    }
449
450    #[test]
451    fn empty_result_methods() {
452        let result = CrossReferenceResult {
453            combined_findings: vec![],
454            clones_in_unused_files: 0,
455            clones_with_unused_exports: 0,
456        };
457        assert_eq!(result.total(), 0);
458        assert!(!result.has_findings());
459        assert!(result.affected_group_indices().is_empty());
460    }
461
462    #[test]
463    fn multiple_groups_with_findings() {
464        let duplication = DuplicationReport {
465            clone_groups: vec![
466                make_group(vec![
467                    make_instance("src/a.ts", 1, 10),
468                    make_instance("src/b.ts", 1, 10),
469                ]),
470                make_group(vec![
471                    make_instance("src/c.ts", 5, 15),
472                    make_instance("src/d.ts", 5, 15),
473                ]),
474                make_group(vec![
475                    make_instance("src/e.ts", 1, 10),
476                    make_instance("src/f.ts", 1, 10),
477                ]),
478            ],
479            clone_families: vec![],
480            mirrored_directories: vec![],
481            stats: crate::duplicates::types::DuplicationStats {
482                total_files: 6,
483                files_with_clones: 6,
484                total_lines: 60,
485                duplicated_lines: 30,
486                total_tokens: 300,
487                duplicated_tokens: 150,
488                clone_groups: 3,
489                clone_instances: 6,
490                duplication_percentage: 50.0,
491            },
492        };
493        let mut dead_code = AnalysisResults::default();
494        dead_code.unused_files.push(UnusedFile {
495            path: PathBuf::from("src/a.ts"),
496        });
497        dead_code.unused_exports.push(UnusedExport {
498            path: PathBuf::from("src/c.ts"),
499            export_name: "helper".to_string(),
500            is_type_only: false,
501            line: 10,
502            col: 0,
503            span_start: 0,
504            is_re_export: false,
505        });
506
507        let result = cross_reference(&duplication, &dead_code);
508        assert_eq!(result.total(), 2);
509        assert_eq!(result.clones_in_unused_files, 1);
510        assert_eq!(result.clones_with_unused_exports, 1);
511
512        let affected = result.affected_group_indices();
513        assert!(affected.contains(&0)); // Group 0 has clone in unused file
514        assert!(affected.contains(&1)); // Group 1 has clone overlapping unused export
515        assert!(!affected.contains(&2)); // Group 2 unaffected
516    }
517
518    #[test]
519    fn clone_instance_outside_export_line_range() {
520        // Clone instance at lines 1-5, unused export at line 10
521        // They don't overlap, so no finding
522        let duplication = DuplicationReport {
523            clone_groups: vec![make_group(vec![
524                make_instance("src/a.ts", 1, 5),
525                make_instance("src/b.ts", 1, 5),
526            ])],
527            clone_families: vec![],
528            mirrored_directories: vec![],
529            stats: crate::duplicates::types::DuplicationStats::default(),
530        };
531        let mut dead_code = AnalysisResults::default();
532        dead_code.unused_exports.push(UnusedExport {
533            path: PathBuf::from("src/a.ts"),
534            export_name: "fn".to_string(),
535            is_type_only: false,
536            line: 10,
537            col: 0,
538            span_start: 0,
539            is_re_export: false,
540        });
541
542        let result = cross_reference(&duplication, &dead_code);
543        assert!(!result.has_findings());
544    }
545
546    #[test]
547    fn clone_in_different_file_than_unused_export() {
548        // Clone is in src/a.ts, unused export is in src/x.ts
549        let duplication = DuplicationReport {
550            clone_groups: vec![make_group(vec![
551                make_instance("src/a.ts", 5, 15),
552                make_instance("src/b.ts", 5, 15),
553            ])],
554            clone_families: vec![],
555            mirrored_directories: vec![],
556            stats: crate::duplicates::types::DuplicationStats::default(),
557        };
558        let mut dead_code = AnalysisResults::default();
559        dead_code.unused_exports.push(UnusedExport {
560            path: PathBuf::from("src/x.ts"), // different file
561            export_name: "fn".to_string(),
562            is_type_only: false,
563            line: 10,
564            col: 0,
565            span_start: 0,
566            is_re_export: false,
567        });
568
569        let result = cross_reference(&duplication, &dead_code);
570        assert!(!result.has_findings());
571    }
572}