Skip to main content

fallow_core/
cross_reference.rs

1//! Cross-reference duplication findings with dead code analysis results.
2//!
3//! When code is both duplicated AND unused, it's a higher-priority finding:
4//! the duplicate can be safely removed without any refactoring. This module
5//! identifies such combined findings.
6
7use rustc_hash::FxHashSet;
8use std::path::PathBuf;
9
10use serde::Serialize;
11
12use crate::duplicates::types::{CloneInstance, DuplicationReport};
13use crate::results::AnalysisResults;
14
15/// A combined finding where a clone instance overlaps with a dead code issue.
16#[derive(Debug, Clone, Serialize)]
17pub struct CombinedFinding {
18    /// The clone instance that is also unused.
19    pub clone_instance: CloneInstance,
20    /// What kind of dead code overlaps with this clone.
21    pub dead_code_kind: DeadCodeKind,
22    /// Clone group index (for associating with the parent group).
23    pub group_index: usize,
24}
25
26/// The type of dead code that overlaps with a clone instance.
27#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
28pub enum DeadCodeKind {
29    /// The entire file containing the clone is unused.
30    UnusedFile,
31    /// A specific unused export overlaps with the clone's line range.
32    UnusedExport { export_name: String },
33    /// A specific unused type overlaps with the clone's line range.
34    UnusedType { type_name: String },
35}
36
37/// Result of cross-referencing duplication with dead code analysis.
38#[derive(Debug, Clone, Serialize)]
39pub struct CrossReferenceResult {
40    /// Clone instances that are also dead code (safe to delete).
41    pub combined_findings: Vec<CombinedFinding>,
42    /// Number of clone instances in unused files.
43    pub clones_in_unused_files: usize,
44    /// Number of clone instances overlapping unused exports.
45    pub clones_with_unused_exports: usize,
46}
47
48/// Cross-reference duplication findings with dead code analysis results.
49///
50/// For each clone instance, checks whether:
51/// 1. The file is entirely unused (in `unused_files`)
52/// 2. An unused export/type at the same line range overlaps
53///
54/// Returns combined findings sorted by priority (unused files first, then exports).
55#[must_use]
56pub fn cross_reference(
57    duplication: &DuplicationReport,
58    dead_code: &AnalysisResults,
59) -> CrossReferenceResult {
60    // Build lookup sets for fast checking
61    let unused_files: FxHashSet<&PathBuf> =
62        dead_code.unused_files.iter().map(|f| &f.path).collect();
63
64    let mut combined_findings = Vec::new();
65    let mut clones_in_unused_files = 0usize;
66    let mut clones_with_unused_exports = 0usize;
67
68    for (group_idx, group) in duplication.clone_groups.iter().enumerate() {
69        for instance in &group.instances {
70            // Check 1: Is the file entirely unused?
71            if unused_files.contains(&instance.file) {
72                combined_findings.push(CombinedFinding {
73                    clone_instance: instance.clone(),
74                    dead_code_kind: DeadCodeKind::UnusedFile,
75                    group_index: group_idx,
76                });
77                clones_in_unused_files += 1;
78                continue; // No need to check exports if entire file is unused
79            }
80
81            // Check 2: Does an unused export/type overlap with this clone's line range?
82            if let Some(finding) = find_overlapping_unused_export(instance, group_idx, dead_code) {
83                clones_with_unused_exports += 1;
84                combined_findings.push(finding);
85            }
86        }
87    }
88
89    CrossReferenceResult {
90        combined_findings,
91        clones_in_unused_files,
92        clones_with_unused_exports,
93    }
94}
95
96/// Check if any unused export/type overlaps with the clone instance's line range.
97fn find_overlapping_unused_export(
98    instance: &CloneInstance,
99    group_index: usize,
100    dead_code: &AnalysisResults,
101) -> Option<CombinedFinding> {
102    // Check unused exports
103    for export in &dead_code.unused_exports {
104        if export.path == instance.file
105            && (export.line as usize) >= instance.start_line
106            && (export.line as usize) <= instance.end_line
107        {
108            return Some(CombinedFinding {
109                clone_instance: instance.clone(),
110                dead_code_kind: DeadCodeKind::UnusedExport {
111                    export_name: export.export_name.clone(),
112                },
113                group_index,
114            });
115        }
116    }
117
118    // Check unused types
119    for type_export in &dead_code.unused_types {
120        if type_export.path == instance.file
121            && (type_export.line as usize) >= instance.start_line
122            && (type_export.line as usize) <= instance.end_line
123        {
124            return Some(CombinedFinding {
125                clone_instance: instance.clone(),
126                dead_code_kind: DeadCodeKind::UnusedType {
127                    type_name: type_export.export_name.clone(),
128                },
129                group_index,
130            });
131        }
132    }
133
134    None
135}
136
137/// Summary statistics for cross-referenced findings.
138impl CrossReferenceResult {
139    /// Total number of combined findings.
140    #[must_use]
141    pub const fn total(&self) -> usize {
142        self.combined_findings.len()
143    }
144
145    /// Whether any combined findings exist.
146    #[must_use]
147    pub const fn has_findings(&self) -> bool {
148        !self.combined_findings.is_empty()
149    }
150
151    /// Get clone groups that have at least one combined finding, with their indices.
152    #[must_use]
153    pub fn affected_group_indices(&self) -> FxHashSet<usize> {
154        self.combined_findings
155            .iter()
156            .map(|f| f.group_index)
157            .collect()
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use crate::duplicates::CloneGroup;
165    use crate::results::{UnusedExport, UnusedFile};
166
167    fn make_instance(file: &str, start: usize, end: usize) -> CloneInstance {
168        CloneInstance {
169            file: PathBuf::from(file),
170            start_line: start,
171            end_line: end,
172            start_col: 0,
173            end_col: 0,
174            fragment: String::new(),
175        }
176    }
177
178    fn make_group(instances: Vec<CloneInstance>) -> CloneGroup {
179        CloneGroup {
180            instances,
181            token_count: 50,
182            line_count: 10,
183        }
184    }
185
186    #[test]
187    fn empty_inputs_produce_no_findings() {
188        let duplication = DuplicationReport {
189            clone_groups: vec![],
190            clone_families: vec![],
191            mirrored_directories: vec![],
192            stats: crate::duplicates::types::DuplicationStats {
193                total_files: 0,
194                files_with_clones: 0,
195                total_lines: 0,
196                duplicated_lines: 0,
197                total_tokens: 0,
198                duplicated_tokens: 0,
199                clone_groups: 0,
200                clone_instances: 0,
201                duplication_percentage: 0.0,
202                clone_groups_below_min_occurrences: 0,
203            },
204        };
205        let dead_code = AnalysisResults::default();
206
207        let result = cross_reference(&duplication, &dead_code);
208        assert!(!result.has_findings());
209        assert_eq!(result.total(), 0);
210    }
211
212    #[test]
213    fn detects_clone_in_unused_file() {
214        let duplication = DuplicationReport {
215            clone_groups: vec![make_group(vec![
216                make_instance("src/a.ts", 1, 10),
217                make_instance("src/b.ts", 1, 10),
218            ])],
219            clone_families: vec![],
220            mirrored_directories: vec![],
221            stats: crate::duplicates::types::DuplicationStats {
222                total_files: 2,
223                files_with_clones: 2,
224                total_lines: 20,
225                duplicated_lines: 10,
226                total_tokens: 100,
227                duplicated_tokens: 50,
228                clone_groups: 1,
229                clone_instances: 2,
230                duplication_percentage: 50.0,
231                clone_groups_below_min_occurrences: 0,
232            },
233        };
234        let mut dead_code = AnalysisResults::default();
235        dead_code.unused_files.push(UnusedFile {
236            path: PathBuf::from("src/a.ts"),
237        });
238
239        let result = cross_reference(&duplication, &dead_code);
240        assert!(result.has_findings());
241        assert_eq!(result.clones_in_unused_files, 1);
242        assert_eq!(
243            result.combined_findings[0].dead_code_kind,
244            DeadCodeKind::UnusedFile
245        );
246    }
247
248    #[test]
249    fn detects_clone_overlapping_unused_export() {
250        let duplication = DuplicationReport {
251            clone_groups: vec![make_group(vec![
252                make_instance("src/a.ts", 5, 15),
253                make_instance("src/b.ts", 5, 15),
254            ])],
255            clone_families: vec![],
256            mirrored_directories: vec![],
257            stats: crate::duplicates::types::DuplicationStats {
258                total_files: 2,
259                files_with_clones: 2,
260                total_lines: 20,
261                duplicated_lines: 10,
262                total_tokens: 100,
263                duplicated_tokens: 50,
264                clone_groups: 1,
265                clone_instances: 2,
266                duplication_percentage: 50.0,
267                clone_groups_below_min_occurrences: 0,
268            },
269        };
270        let mut dead_code = AnalysisResults::default();
271        dead_code.unused_exports.push(UnusedExport {
272            path: PathBuf::from("src/a.ts"),
273            export_name: "processData".to_string(),
274            is_type_only: false,
275            line: 5,
276            col: 0,
277            span_start: 0,
278            is_re_export: false,
279        });
280
281        let result = cross_reference(&duplication, &dead_code);
282        assert!(result.has_findings());
283        assert_eq!(result.clones_with_unused_exports, 1);
284        assert!(matches!(
285            &result.combined_findings[0].dead_code_kind,
286            DeadCodeKind::UnusedExport { export_name } if export_name == "processData"
287        ));
288    }
289
290    #[test]
291    fn no_findings_when_no_overlap() {
292        let duplication = DuplicationReport {
293            clone_groups: vec![make_group(vec![
294                make_instance("src/a.ts", 5, 15),
295                make_instance("src/b.ts", 5, 15),
296            ])],
297            clone_families: vec![],
298            mirrored_directories: vec![],
299            stats: crate::duplicates::types::DuplicationStats {
300                total_files: 2,
301                files_with_clones: 2,
302                total_lines: 20,
303                duplicated_lines: 10,
304                total_tokens: 100,
305                duplicated_tokens: 50,
306                clone_groups: 1,
307                clone_instances: 2,
308                duplication_percentage: 50.0,
309                clone_groups_below_min_occurrences: 0,
310            },
311        };
312        let mut dead_code = AnalysisResults::default();
313        // Unused export on a different line range
314        dead_code.unused_exports.push(UnusedExport {
315            path: PathBuf::from("src/a.ts"),
316            export_name: "other".to_string(),
317            is_type_only: false,
318            line: 20, // outside clone range 5-15
319            col: 0,
320            span_start: 0,
321            is_re_export: false,
322        });
323
324        let result = cross_reference(&duplication, &dead_code);
325        assert!(!result.has_findings());
326    }
327
328    #[test]
329    fn affected_group_indices() {
330        let duplication = DuplicationReport {
331            clone_groups: vec![
332                make_group(vec![
333                    make_instance("src/a.ts", 1, 10),
334                    make_instance("src/b.ts", 1, 10),
335                ]),
336                make_group(vec![
337                    make_instance("src/c.ts", 1, 10),
338                    make_instance("src/d.ts", 1, 10),
339                ]),
340            ],
341            clone_families: vec![],
342            mirrored_directories: vec![],
343            stats: crate::duplicates::types::DuplicationStats {
344                total_files: 4,
345                files_with_clones: 4,
346                total_lines: 40,
347                duplicated_lines: 20,
348                total_tokens: 200,
349                duplicated_tokens: 100,
350                clone_groups: 2,
351                clone_instances: 4,
352                duplication_percentage: 50.0,
353                clone_groups_below_min_occurrences: 0,
354            },
355        };
356        let mut dead_code = AnalysisResults::default();
357        dead_code.unused_files.push(UnusedFile {
358            path: PathBuf::from("src/c.ts"),
359        });
360
361        let result = cross_reference(&duplication, &dead_code);
362        let affected = result.affected_group_indices();
363        assert!(!affected.contains(&0)); // Group 0 not affected
364        assert!(affected.contains(&1)); // Group 1 has clone in unused file
365    }
366
367    #[test]
368    fn unused_file_takes_priority_over_export() {
369        // If a file is unused AND has unused exports, we should only get the
370        // UnusedFile finding (not both), because the continue skips export checks.
371        let duplication = DuplicationReport {
372            clone_groups: vec![make_group(vec![
373                make_instance("src/a.ts", 5, 15),
374                make_instance("src/b.ts", 5, 15),
375            ])],
376            clone_families: vec![],
377            mirrored_directories: vec![],
378            stats: crate::duplicates::types::DuplicationStats {
379                total_files: 2,
380                files_with_clones: 2,
381                total_lines: 20,
382                duplicated_lines: 10,
383                total_tokens: 100,
384                duplicated_tokens: 50,
385                clone_groups: 1,
386                clone_instances: 2,
387                duplication_percentage: 50.0,
388                clone_groups_below_min_occurrences: 0,
389            },
390        };
391        let mut dead_code = AnalysisResults::default();
392        dead_code.unused_files.push(UnusedFile {
393            path: PathBuf::from("src/a.ts"),
394        });
395        dead_code.unused_exports.push(UnusedExport {
396            path: PathBuf::from("src/a.ts"),
397            export_name: "foo".to_string(),
398            is_type_only: false,
399            line: 10,
400            col: 0,
401            span_start: 0,
402            is_re_export: false,
403        });
404
405        let result = cross_reference(&duplication, &dead_code);
406        // Only 1 finding for src/a.ts (the unused file), not 2
407        let a_findings: Vec<_> = result
408            .combined_findings
409            .iter()
410            .filter(|f| f.clone_instance.file == std::path::Path::new("src/a.ts"))
411            .collect();
412        assert_eq!(a_findings.len(), 1);
413        assert_eq!(a_findings[0].dead_code_kind, DeadCodeKind::UnusedFile);
414    }
415
416    #[test]
417    fn detects_clone_overlapping_unused_type() {
418        let duplication = DuplicationReport {
419            clone_groups: vec![make_group(vec![
420                make_instance("src/types.ts", 1, 20),
421                make_instance("src/other.ts", 1, 20),
422            ])],
423            clone_families: vec![],
424            mirrored_directories: vec![],
425            stats: crate::duplicates::types::DuplicationStats {
426                total_files: 2,
427                files_with_clones: 2,
428                total_lines: 40,
429                duplicated_lines: 20,
430                total_tokens: 100,
431                duplicated_tokens: 50,
432                clone_groups: 1,
433                clone_instances: 2,
434                duplication_percentage: 50.0,
435                clone_groups_below_min_occurrences: 0,
436            },
437        };
438        let mut dead_code = AnalysisResults::default();
439        dead_code.unused_types.push(UnusedExport {
440            path: PathBuf::from("src/types.ts"),
441            export_name: "OldInterface".to_string(),
442            is_type_only: true,
443            line: 10,
444            col: 0,
445            span_start: 0,
446            is_re_export: false,
447        });
448
449        let result = cross_reference(&duplication, &dead_code);
450        assert!(result.has_findings());
451        assert!(matches!(
452            &result.combined_findings[0].dead_code_kind,
453            DeadCodeKind::UnusedType { type_name } if type_name == "OldInterface"
454        ));
455    }
456
457    #[test]
458    fn empty_result_methods() {
459        let result = CrossReferenceResult {
460            combined_findings: vec![],
461            clones_in_unused_files: 0,
462            clones_with_unused_exports: 0,
463        };
464        assert_eq!(result.total(), 0);
465        assert!(!result.has_findings());
466        assert!(result.affected_group_indices().is_empty());
467    }
468
469    #[test]
470    fn multiple_groups_with_findings() {
471        let duplication = DuplicationReport {
472            clone_groups: vec![
473                make_group(vec![
474                    make_instance("src/a.ts", 1, 10),
475                    make_instance("src/b.ts", 1, 10),
476                ]),
477                make_group(vec![
478                    make_instance("src/c.ts", 5, 15),
479                    make_instance("src/d.ts", 5, 15),
480                ]),
481                make_group(vec![
482                    make_instance("src/e.ts", 1, 10),
483                    make_instance("src/f.ts", 1, 10),
484                ]),
485            ],
486            clone_families: vec![],
487            mirrored_directories: vec![],
488            stats: crate::duplicates::types::DuplicationStats {
489                total_files: 6,
490                files_with_clones: 6,
491                total_lines: 60,
492                duplicated_lines: 30,
493                total_tokens: 300,
494                duplicated_tokens: 150,
495                clone_groups: 3,
496                clone_instances: 6,
497                duplication_percentage: 50.0,
498                clone_groups_below_min_occurrences: 0,
499            },
500        };
501        let mut dead_code = AnalysisResults::default();
502        dead_code.unused_files.push(UnusedFile {
503            path: PathBuf::from("src/a.ts"),
504        });
505        dead_code.unused_exports.push(UnusedExport {
506            path: PathBuf::from("src/c.ts"),
507            export_name: "helper".to_string(),
508            is_type_only: false,
509            line: 10,
510            col: 0,
511            span_start: 0,
512            is_re_export: false,
513        });
514
515        let result = cross_reference(&duplication, &dead_code);
516        assert_eq!(result.total(), 2);
517        assert_eq!(result.clones_in_unused_files, 1);
518        assert_eq!(result.clones_with_unused_exports, 1);
519
520        let affected = result.affected_group_indices();
521        assert!(affected.contains(&0)); // Group 0 has clone in unused file
522        assert!(affected.contains(&1)); // Group 1 has clone overlapping unused export
523        assert!(!affected.contains(&2)); // Group 2 unaffected
524    }
525
526    #[test]
527    fn clone_instance_outside_export_line_range() {
528        // Clone instance at lines 1-5, unused export at line 10
529        // They don't overlap, so no finding
530        let duplication = DuplicationReport {
531            clone_groups: vec![make_group(vec![
532                make_instance("src/a.ts", 1, 5),
533                make_instance("src/b.ts", 1, 5),
534            ])],
535            clone_families: vec![],
536            mirrored_directories: vec![],
537            stats: crate::duplicates::types::DuplicationStats::default(),
538        };
539        let mut dead_code = AnalysisResults::default();
540        dead_code.unused_exports.push(UnusedExport {
541            path: PathBuf::from("src/a.ts"),
542            export_name: "fn".to_string(),
543            is_type_only: false,
544            line: 10,
545            col: 0,
546            span_start: 0,
547            is_re_export: false,
548        });
549
550        let result = cross_reference(&duplication, &dead_code);
551        assert!(!result.has_findings());
552    }
553
554    #[test]
555    fn clone_in_different_file_than_unused_export() {
556        // Clone is in src/a.ts, unused export is in src/x.ts
557        let duplication = DuplicationReport {
558            clone_groups: vec![make_group(vec![
559                make_instance("src/a.ts", 5, 15),
560                make_instance("src/b.ts", 5, 15),
561            ])],
562            clone_families: vec![],
563            mirrored_directories: vec![],
564            stats: crate::duplicates::types::DuplicationStats::default(),
565        };
566        let mut dead_code = AnalysisResults::default();
567        dead_code.unused_exports.push(UnusedExport {
568            path: PathBuf::from("src/x.ts"), // different file
569            export_name: "fn".to_string(),
570            is_type_only: false,
571            line: 10,
572            col: 0,
573            span_start: 0,
574            is_re_export: false,
575        });
576
577        let result = cross_reference(&duplication, &dead_code);
578        assert!(!result.has_findings());
579    }
580}