1pub mod analyzer;
2pub mod cli;
3pub mod code_unit;
4pub mod config;
5pub mod error;
6pub mod extractor;
7pub mod fingerprint;
8pub mod grouper;
9pub mod ignore;
10pub mod node;
11pub mod output;
12pub mod scanner;
13pub mod similarity;
14
15use std::collections::HashSet;
16use std::path::PathBuf;
17
18use analyzer::LanguageAnalyzer;
19use code_unit::CodeUnit;
20use config::Config;
21use fingerprint::Fingerprint;
22use grouper::{DuplicateGroup, DuplicationStats};
23
24pub struct AnalysisResult {
26 pub stats: DuplicationStats,
27 pub exact_groups: Vec<DuplicateGroup>,
28 pub near_groups: Vec<DuplicateGroup>,
29 pub sub_exact_groups: Vec<DuplicateGroup>,
30 pub sub_near_groups: Vec<DuplicateGroup>,
31 pub warnings: Vec<String>,
32 pub all_fingerprints: HashSet<Fingerprint>,
35}
36
37pub fn analyze(
42 analyzer: &dyn LanguageAnalyzer,
43 files: &[PathBuf],
44 config: &Config,
45) -> error::Result<AnalysisResult> {
46 let analysis_config = config.analysis_config();
47 let mut units = Vec::new();
48 let mut warnings = Vec::new();
49
50 for path in files {
51 let source = match std::fs::read_to_string(path) {
52 Ok(s) => s,
53 Err(e) => {
54 warnings.push(format!("Failed to read {}: {}", path.display(), e));
55 continue;
56 }
57 };
58 match analyzer.parse_file(path, &source, &analysis_config) {
59 Ok(mut file_units) => {
60 if config.exclude_tests {
61 file_units.retain(|u| !analyzer.is_test_code(u));
62 }
63 units.extend(file_units);
64 }
65 Err(e) => warnings.push(e.to_string()),
66 }
67 }
68
69 analyze_units(&units, warnings, config)
70}
71
72pub fn analyze_units(
77 units: &[CodeUnit],
78 warnings: Vec<String>,
79 config: &Config,
80) -> error::Result<AnalysisResult> {
81 let exact_groups = grouper::group_exact_duplicates(units);
83
84 let exact_fps: Vec<_> = exact_groups.iter().map(|g| g.fingerprint).collect();
86 let near_groups = grouper::find_near_duplicates(units, config.similarity_threshold, &exact_fps);
87
88 let (sub_exact_groups, sub_near_groups) = if config.sub_function {
90 let sub_units: Vec<CodeUnit> = units
92 .iter()
93 .flat_map(|unit| {
94 let sub_units = extractor::extract_sub_units(&unit.body, config.min_sub_nodes);
95 sub_units.into_iter().map(|su| CodeUnit {
96 kind: su.kind,
97 name: su.description,
98 file: unit.file.clone(),
99 line_start: unit.line_start,
100 line_end: unit.line_end,
101 signature: node::NormalizedNode::leaf(node::NodeKind::Opaque),
102 body: su.node.clone(),
103 fingerprint: fingerprint::Fingerprint::from_node(&su.node),
104 node_count: su.node_count,
105 parent_name: Some(unit.name.clone()),
106 is_test: unit.is_test,
107 })
108 })
109 .collect();
110
111 let sub_exact = grouper::group_exact_duplicates(&sub_units);
112 let sub_exact_fps: Vec<_> = sub_exact.iter().map(|g| g.fingerprint).collect();
113 let sub_near =
114 grouper::find_near_duplicates(&sub_units, config.similarity_threshold, &sub_exact_fps);
115 (sub_exact, sub_near)
116 } else {
117 (Vec::new(), Vec::new())
118 };
119
120 let all_fingerprints: HashSet<Fingerprint> = exact_groups
122 .iter()
123 .chain(near_groups.iter())
124 .chain(sub_exact_groups.iter())
125 .chain(sub_near_groups.iter())
126 .map(|g| g.fingerprint)
127 .collect();
128
129 let ignore_file = ignore::load_ignore_file(&config.root);
131 let exact_groups = ignore::filter_ignored(exact_groups, &ignore_file);
132 let near_groups = ignore::filter_ignored(near_groups, &ignore_file);
133 let sub_exact_groups = ignore::filter_ignored(sub_exact_groups, &ignore_file);
134 let sub_near_groups = ignore::filter_ignored(sub_near_groups, &ignore_file);
135
136 let stats = grouper::compute_stats_with_sub(
138 units,
139 &exact_groups,
140 &near_groups,
141 &sub_exact_groups,
142 &sub_near_groups,
143 );
144
145 Ok(AnalysisResult {
146 stats,
147 exact_groups,
148 near_groups,
149 sub_exact_groups,
150 sub_near_groups,
151 warnings,
152 all_fingerprints,
153 })
154}