Skip to main content

fallow_core/
cross_reference.rs

1//! Cross-reference duplication findings with dead code analysis results.
2//!
3//! When code is both duplicated AND unused, it's a higher-priority finding:
4//! the duplicate can be safely removed without any refactoring. This module
5//! identifies such combined findings.
6
7use rustc_hash::FxHashSet;
8use std::path::PathBuf;
9
10use serde::Serialize;
11
12use crate::duplicates::types::{CloneInstance, DuplicationReport};
13use crate::results::AnalysisResults;
14
15/// A combined finding where a clone instance overlaps with a dead code issue.
16#[derive(Debug, Clone, Serialize)]
17pub struct CombinedFinding {
18    /// The clone instance that is also unused.
19    pub clone_instance: CloneInstance,
20    /// What kind of dead code overlaps with this clone.
21    pub dead_code_kind: DeadCodeKind,
22    /// Clone group index (for associating with the parent group).
23    pub group_index: usize,
24}
25
26/// The type of dead code that overlaps with a clone instance.
27#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
28pub enum DeadCodeKind {
29    /// The entire file containing the clone is unused.
30    UnusedFile,
31    /// A specific unused export overlaps with the clone's line range.
32    UnusedExport { export_name: String },
33    /// A specific unused type overlaps with the clone's line range.
34    UnusedType { type_name: String },
35}
36
37/// Result of cross-referencing duplication with dead code analysis.
38#[derive(Debug, Clone, Serialize)]
39pub struct CrossReferenceResult {
40    /// Clone instances that are also dead code (safe to delete).
41    pub combined_findings: Vec<CombinedFinding>,
42    /// Number of clone instances in unused files.
43    pub clones_in_unused_files: usize,
44    /// Number of clone instances overlapping unused exports.
45    pub clones_with_unused_exports: usize,
46}
47
48/// Cross-reference duplication findings with dead code analysis results.
49///
50/// For each clone instance, checks whether:
51/// 1. The file is entirely unused (in `unused_files`)
52/// 2. An unused export/type at the same line range overlaps
53///
54/// Returns combined findings sorted by priority (unused files first, then exports).
55#[must_use]
56pub fn cross_reference(
57    duplication: &DuplicationReport,
58    dead_code: &AnalysisResults,
59) -> CrossReferenceResult {
60    // Build lookup sets for fast checking
61    let unused_files: FxHashSet<&PathBuf> = dead_code
62        .unused_files
63        .iter()
64        .map(|f| &f.file.path)
65        .collect();
66
67    let mut combined_findings = Vec::new();
68    let mut clones_in_unused_files = 0usize;
69    let mut clones_with_unused_exports = 0usize;
70
71    for (group_idx, group) in duplication.clone_groups.iter().enumerate() {
72        for instance in &group.instances {
73            // Check 1: Is the file entirely unused?
74            if unused_files.contains(&instance.file) {
75                combined_findings.push(CombinedFinding {
76                    clone_instance: instance.clone(),
77                    dead_code_kind: DeadCodeKind::UnusedFile,
78                    group_index: group_idx,
79                });
80                clones_in_unused_files += 1;
81                continue; // No need to check exports if entire file is unused
82            }
83
84            // Check 2: Does an unused export/type overlap with this clone's line range?
85            if let Some(finding) = find_overlapping_unused_export(instance, group_idx, dead_code) {
86                clones_with_unused_exports += 1;
87                combined_findings.push(finding);
88            }
89        }
90    }
91
92    CrossReferenceResult {
93        combined_findings,
94        clones_in_unused_files,
95        clones_with_unused_exports,
96    }
97}
98
99/// Check if any unused export/type overlaps with the clone instance's line range.
100fn find_overlapping_unused_export(
101    instance: &CloneInstance,
102    group_index: usize,
103    dead_code: &AnalysisResults,
104) -> Option<CombinedFinding> {
105    // Check unused exports
106    for export in &dead_code.unused_exports {
107        if export.export.path == instance.file
108            && (export.export.line as usize) >= instance.start_line
109            && (export.export.line as usize) <= instance.end_line
110        {
111            return Some(CombinedFinding {
112                clone_instance: instance.clone(),
113                dead_code_kind: DeadCodeKind::UnusedExport {
114                    export_name: export.export.export_name.clone(),
115                },
116                group_index,
117            });
118        }
119    }
120
121    // Check unused types
122    for type_export in &dead_code.unused_types {
123        if type_export.export.path == instance.file
124            && (type_export.export.line as usize) >= instance.start_line
125            && (type_export.export.line as usize) <= instance.end_line
126        {
127            return Some(CombinedFinding {
128                clone_instance: instance.clone(),
129                dead_code_kind: DeadCodeKind::UnusedType {
130                    type_name: type_export.export.export_name.clone(),
131                },
132                group_index,
133            });
134        }
135    }
136
137    None
138}
139
140/// Summary statistics for cross-referenced findings.
141impl CrossReferenceResult {
142    /// Total number of combined findings.
143    #[must_use]
144    pub const fn total(&self) -> usize {
145        self.combined_findings.len()
146    }
147
148    /// Whether any combined findings exist.
149    #[must_use]
150    pub const fn has_findings(&self) -> bool {
151        !self.combined_findings.is_empty()
152    }
153
154    /// Get clone groups that have at least one combined finding, with their indices.
155    #[must_use]
156    pub fn affected_group_indices(&self) -> FxHashSet<usize> {
157        self.combined_findings
158            .iter()
159            .map(|f| f.group_index)
160            .collect()
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167    use crate::duplicates::CloneGroup;
168    use crate::results::{UnusedExport, UnusedFile};
169    use fallow_types::output_dead_code::{
170        UnusedExportFinding, UnusedFileFinding, UnusedTypeFinding,
171    };
172
173    fn make_instance(file: &str, start: usize, end: usize) -> CloneInstance {
174        CloneInstance {
175            file: PathBuf::from(file),
176            start_line: start,
177            end_line: end,
178            start_col: 0,
179            end_col: 0,
180            fragment: String::new(),
181        }
182    }
183
184    fn make_group(instances: Vec<CloneInstance>) -> CloneGroup {
185        CloneGroup {
186            instances,
187            token_count: 50,
188            line_count: 10,
189        }
190    }
191
192    #[test]
193    fn empty_inputs_produce_no_findings() {
194        let duplication = DuplicationReport {
195            clone_groups: vec![],
196            clone_families: vec![],
197            mirrored_directories: vec![],
198            stats: crate::duplicates::types::DuplicationStats {
199                total_files: 0,
200                files_with_clones: 0,
201                total_lines: 0,
202                duplicated_lines: 0,
203                total_tokens: 0,
204                duplicated_tokens: 0,
205                clone_groups: 0,
206                clone_instances: 0,
207                duplication_percentage: 0.0,
208                clone_groups_below_min_occurrences: 0,
209            },
210        };
211        let dead_code = AnalysisResults::default();
212
213        let result = cross_reference(&duplication, &dead_code);
214        assert!(!result.has_findings());
215        assert_eq!(result.total(), 0);
216    }
217
218    #[test]
219    fn detects_clone_in_unused_file() {
220        let duplication = DuplicationReport {
221            clone_groups: vec![make_group(vec![
222                make_instance("src/a.ts", 1, 10),
223                make_instance("src/b.ts", 1, 10),
224            ])],
225            clone_families: vec![],
226            mirrored_directories: vec![],
227            stats: crate::duplicates::types::DuplicationStats {
228                total_files: 2,
229                files_with_clones: 2,
230                total_lines: 20,
231                duplicated_lines: 10,
232                total_tokens: 100,
233                duplicated_tokens: 50,
234                clone_groups: 1,
235                clone_instances: 2,
236                duplication_percentage: 50.0,
237                clone_groups_below_min_occurrences: 0,
238            },
239        };
240        let mut dead_code = AnalysisResults::default();
241        dead_code
242            .unused_files
243            .push(UnusedFileFinding::with_actions(UnusedFile {
244                path: PathBuf::from("src/a.ts"),
245            }));
246
247        let result = cross_reference(&duplication, &dead_code);
248        assert!(result.has_findings());
249        assert_eq!(result.clones_in_unused_files, 1);
250        assert_eq!(
251            result.combined_findings[0].dead_code_kind,
252            DeadCodeKind::UnusedFile
253        );
254    }
255
256    #[test]
257    fn detects_clone_overlapping_unused_export() {
258        let duplication = DuplicationReport {
259            clone_groups: vec![make_group(vec![
260                make_instance("src/a.ts", 5, 15),
261                make_instance("src/b.ts", 5, 15),
262            ])],
263            clone_families: vec![],
264            mirrored_directories: vec![],
265            stats: crate::duplicates::types::DuplicationStats {
266                total_files: 2,
267                files_with_clones: 2,
268                total_lines: 20,
269                duplicated_lines: 10,
270                total_tokens: 100,
271                duplicated_tokens: 50,
272                clone_groups: 1,
273                clone_instances: 2,
274                duplication_percentage: 50.0,
275                clone_groups_below_min_occurrences: 0,
276            },
277        };
278        let mut dead_code = AnalysisResults::default();
279        dead_code
280            .unused_exports
281            .push(UnusedExportFinding::with_actions(UnusedExport {
282                path: PathBuf::from("src/a.ts"),
283                export_name: "processData".to_string(),
284                is_type_only: false,
285                line: 5,
286                col: 0,
287                span_start: 0,
288                is_re_export: false,
289            }));
290
291        let result = cross_reference(&duplication, &dead_code);
292        assert!(result.has_findings());
293        assert_eq!(result.clones_with_unused_exports, 1);
294        assert!(matches!(
295            &result.combined_findings[0].dead_code_kind,
296            DeadCodeKind::UnusedExport { export_name } if export_name == "processData"
297        ));
298    }
299
300    #[test]
301    fn no_findings_when_no_overlap() {
302        let duplication = DuplicationReport {
303            clone_groups: vec![make_group(vec![
304                make_instance("src/a.ts", 5, 15),
305                make_instance("src/b.ts", 5, 15),
306            ])],
307            clone_families: vec![],
308            mirrored_directories: vec![],
309            stats: crate::duplicates::types::DuplicationStats {
310                total_files: 2,
311                files_with_clones: 2,
312                total_lines: 20,
313                duplicated_lines: 10,
314                total_tokens: 100,
315                duplicated_tokens: 50,
316                clone_groups: 1,
317                clone_instances: 2,
318                duplication_percentage: 50.0,
319                clone_groups_below_min_occurrences: 0,
320            },
321        };
322        let mut dead_code = AnalysisResults::default();
323        // Unused export on a different line range
324        dead_code
325            .unused_exports
326            .push(UnusedExportFinding::with_actions(UnusedExport {
327                path: PathBuf::from("src/a.ts"),
328                export_name: "other".to_string(),
329                is_type_only: false,
330                line: 20, // outside clone range 5-15
331                col: 0,
332                span_start: 0,
333                is_re_export: false,
334            }));
335
336        let result = cross_reference(&duplication, &dead_code);
337        assert!(!result.has_findings());
338    }
339
340    #[test]
341    fn affected_group_indices() {
342        let duplication = DuplicationReport {
343            clone_groups: vec![
344                make_group(vec![
345                    make_instance("src/a.ts", 1, 10),
346                    make_instance("src/b.ts", 1, 10),
347                ]),
348                make_group(vec![
349                    make_instance("src/c.ts", 1, 10),
350                    make_instance("src/d.ts", 1, 10),
351                ]),
352            ],
353            clone_families: vec![],
354            mirrored_directories: vec![],
355            stats: crate::duplicates::types::DuplicationStats {
356                total_files: 4,
357                files_with_clones: 4,
358                total_lines: 40,
359                duplicated_lines: 20,
360                total_tokens: 200,
361                duplicated_tokens: 100,
362                clone_groups: 2,
363                clone_instances: 4,
364                duplication_percentage: 50.0,
365                clone_groups_below_min_occurrences: 0,
366            },
367        };
368        let mut dead_code = AnalysisResults::default();
369        dead_code
370            .unused_files
371            .push(UnusedFileFinding::with_actions(UnusedFile {
372                path: PathBuf::from("src/c.ts"),
373            }));
374
375        let result = cross_reference(&duplication, &dead_code);
376        let affected = result.affected_group_indices();
377        assert!(!affected.contains(&0)); // Group 0 not affected
378        assert!(affected.contains(&1)); // Group 1 has clone in unused file
379    }
380
381    #[test]
382    fn unused_file_takes_priority_over_export() {
383        // If a file is unused AND has unused exports, we should only get the
384        // UnusedFile finding (not both), because the continue skips export checks.
385        let duplication = DuplicationReport {
386            clone_groups: vec![make_group(vec![
387                make_instance("src/a.ts", 5, 15),
388                make_instance("src/b.ts", 5, 15),
389            ])],
390            clone_families: vec![],
391            mirrored_directories: vec![],
392            stats: crate::duplicates::types::DuplicationStats {
393                total_files: 2,
394                files_with_clones: 2,
395                total_lines: 20,
396                duplicated_lines: 10,
397                total_tokens: 100,
398                duplicated_tokens: 50,
399                clone_groups: 1,
400                clone_instances: 2,
401                duplication_percentage: 50.0,
402                clone_groups_below_min_occurrences: 0,
403            },
404        };
405        let mut dead_code = AnalysisResults::default();
406        dead_code
407            .unused_files
408            .push(UnusedFileFinding::with_actions(UnusedFile {
409                path: PathBuf::from("src/a.ts"),
410            }));
411        dead_code
412            .unused_exports
413            .push(UnusedExportFinding::with_actions(UnusedExport {
414                path: PathBuf::from("src/a.ts"),
415                export_name: "foo".to_string(),
416                is_type_only: false,
417                line: 10,
418                col: 0,
419                span_start: 0,
420                is_re_export: false,
421            }));
422
423        let result = cross_reference(&duplication, &dead_code);
424        // Only 1 finding for src/a.ts (the unused file), not 2
425        let a_findings: Vec<_> = result
426            .combined_findings
427            .iter()
428            .filter(|f| f.clone_instance.file == std::path::Path::new("src/a.ts"))
429            .collect();
430        assert_eq!(a_findings.len(), 1);
431        assert_eq!(a_findings[0].dead_code_kind, DeadCodeKind::UnusedFile);
432    }
433
434    #[test]
435    fn detects_clone_overlapping_unused_type() {
436        let duplication = DuplicationReport {
437            clone_groups: vec![make_group(vec![
438                make_instance("src/types.ts", 1, 20),
439                make_instance("src/other.ts", 1, 20),
440            ])],
441            clone_families: vec![],
442            mirrored_directories: vec![],
443            stats: crate::duplicates::types::DuplicationStats {
444                total_files: 2,
445                files_with_clones: 2,
446                total_lines: 40,
447                duplicated_lines: 20,
448                total_tokens: 100,
449                duplicated_tokens: 50,
450                clone_groups: 1,
451                clone_instances: 2,
452                duplication_percentage: 50.0,
453                clone_groups_below_min_occurrences: 0,
454            },
455        };
456        let mut dead_code = AnalysisResults::default();
457        dead_code
458            .unused_types
459            .push(UnusedTypeFinding::with_actions(UnusedExport {
460                path: PathBuf::from("src/types.ts"),
461                export_name: "OldInterface".to_string(),
462                is_type_only: true,
463                line: 10,
464                col: 0,
465                span_start: 0,
466                is_re_export: false,
467            }));
468
469        let result = cross_reference(&duplication, &dead_code);
470        assert!(result.has_findings());
471        assert!(matches!(
472            &result.combined_findings[0].dead_code_kind,
473            DeadCodeKind::UnusedType { type_name } if type_name == "OldInterface"
474        ));
475    }
476
477    #[test]
478    fn empty_result_methods() {
479        let result = CrossReferenceResult {
480            combined_findings: vec![],
481            clones_in_unused_files: 0,
482            clones_with_unused_exports: 0,
483        };
484        assert_eq!(result.total(), 0);
485        assert!(!result.has_findings());
486        assert!(result.affected_group_indices().is_empty());
487    }
488
489    #[test]
490    fn multiple_groups_with_findings() {
491        let duplication = DuplicationReport {
492            clone_groups: vec![
493                make_group(vec![
494                    make_instance("src/a.ts", 1, 10),
495                    make_instance("src/b.ts", 1, 10),
496                ]),
497                make_group(vec![
498                    make_instance("src/c.ts", 5, 15),
499                    make_instance("src/d.ts", 5, 15),
500                ]),
501                make_group(vec![
502                    make_instance("src/e.ts", 1, 10),
503                    make_instance("src/f.ts", 1, 10),
504                ]),
505            ],
506            clone_families: vec![],
507            mirrored_directories: vec![],
508            stats: crate::duplicates::types::DuplicationStats {
509                total_files: 6,
510                files_with_clones: 6,
511                total_lines: 60,
512                duplicated_lines: 30,
513                total_tokens: 300,
514                duplicated_tokens: 150,
515                clone_groups: 3,
516                clone_instances: 6,
517                duplication_percentage: 50.0,
518                clone_groups_below_min_occurrences: 0,
519            },
520        };
521        let mut dead_code = AnalysisResults::default();
522        dead_code
523            .unused_files
524            .push(UnusedFileFinding::with_actions(UnusedFile {
525                path: PathBuf::from("src/a.ts"),
526            }));
527        dead_code
528            .unused_exports
529            .push(UnusedExportFinding::with_actions(UnusedExport {
530                path: PathBuf::from("src/c.ts"),
531                export_name: "helper".to_string(),
532                is_type_only: false,
533                line: 10,
534                col: 0,
535                span_start: 0,
536                is_re_export: false,
537            }));
538
539        let result = cross_reference(&duplication, &dead_code);
540        assert_eq!(result.total(), 2);
541        assert_eq!(result.clones_in_unused_files, 1);
542        assert_eq!(result.clones_with_unused_exports, 1);
543
544        let affected = result.affected_group_indices();
545        assert!(affected.contains(&0)); // Group 0 has clone in unused file
546        assert!(affected.contains(&1)); // Group 1 has clone overlapping unused export
547        assert!(!affected.contains(&2)); // Group 2 unaffected
548    }
549
550    #[test]
551    fn clone_instance_outside_export_line_range() {
552        // Clone instance at lines 1-5, unused export at line 10
553        // They don't overlap, so no finding
554        let duplication = DuplicationReport {
555            clone_groups: vec![make_group(vec![
556                make_instance("src/a.ts", 1, 5),
557                make_instance("src/b.ts", 1, 5),
558            ])],
559            clone_families: vec![],
560            mirrored_directories: vec![],
561            stats: crate::duplicates::types::DuplicationStats::default(),
562        };
563        let mut dead_code = AnalysisResults::default();
564        dead_code
565            .unused_exports
566            .push(UnusedExportFinding::with_actions(UnusedExport {
567                path: PathBuf::from("src/a.ts"),
568                export_name: "fn".to_string(),
569                is_type_only: false,
570                line: 10,
571                col: 0,
572                span_start: 0,
573                is_re_export: false,
574            }));
575
576        let result = cross_reference(&duplication, &dead_code);
577        assert!(!result.has_findings());
578    }
579
580    #[test]
581    fn clone_in_different_file_than_unused_export() {
582        // Clone is in src/a.ts, unused export is in src/x.ts
583        let duplication = DuplicationReport {
584            clone_groups: vec![make_group(vec![
585                make_instance("src/a.ts", 5, 15),
586                make_instance("src/b.ts", 5, 15),
587            ])],
588            clone_families: vec![],
589            mirrored_directories: vec![],
590            stats: crate::duplicates::types::DuplicationStats::default(),
591        };
592        let mut dead_code = AnalysisResults::default();
593        dead_code
594            .unused_exports
595            .push(UnusedExportFinding::with_actions(UnusedExport {
596                path: PathBuf::from("src/x.ts"), // different file
597                export_name: "fn".to_string(),
598                is_type_only: false,
599                line: 10,
600                col: 0,
601                span_start: 0,
602                is_re_export: false,
603            }));
604
605        let result = cross_reference(&duplication, &dead_code);
606        assert!(!result.has_findings());
607    }
608}