1use std::time::Instant;
2
3use fallow_config::{DetectionMode, DuplicatesConfig};
4use fallow_engine::{project_config::ProjectConfig, session::AnalysisSession};
5use fallow_output::{
6 DupesNextStepsInput, DupesOutput, DupesOutputInput, build_dupes_next_steps, build_dupes_output,
7 dupes_meta,
8};
9use fallow_types::output_format::OutputFormat;
10use rustc_hash::FxHashSet;
11
12use crate::{
13 DupesReportPayload, DuplicationGroup, DuplicationMode, DuplicationOptions,
14 DuplicationProgrammaticOutput, ProgrammaticError,
15 analysis_context::{
16 ProgrammaticAnalysisContext, changed_files_for_run,
17 resolve_programmatic_analysis_context_deferred_workspace, workspace_roots_for_session,
18 },
19 duplication_filters::{apply_top, filter_by_diff, filter_by_workspaces},
20 next_steps::{setup_pointer_applicable, suggestions_enabled},
21};
22
23use super::{ProgrammaticResult, root_envelope_mode};
24
25pub(super) const SCHEMA_VERSION: u32 = 1;
26
27pub fn run_duplication(
34 options: &DuplicationOptions,
35) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
36 let resolved = resolve_programmatic_analysis_context_deferred_workspace(&options.analysis)?;
37 resolved.install(|| run_duplication_inner(options, &resolved))
38}
39
40fn run_duplication_inner(
41 options: &DuplicationOptions,
42 resolved: &ProgrammaticAnalysisContext,
43) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
44 let start = Instant::now();
45 let session = load_duplication_session(options, resolved)?;
46 run_duplication_with_session(options, resolved, &session, None, start)
47}
48
49pub(super) fn run_duplication_with_session(
50 options: &DuplicationOptions,
51 resolved: &ProgrammaticAnalysisContext,
52 session: &AnalysisSession,
53 changed_files: Option<&FxHashSet<std::path::PathBuf>>,
54 start: Instant,
55) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
56 let dupes_config = build_dupes_config(options, &session.config().duplicates);
57 let resolved_changed_files = if changed_files.is_some() {
58 None
59 } else {
60 changed_files_for_run(resolved)?
61 };
62 let cache_dir = (!resolved.no_cache).then_some(session.config().cache_dir.as_path());
63 let report = if let Some(changed_files) = changed_files.or(resolved_changed_files.as_ref()) {
64 let changed_files = changed_files.iter().cloned().collect::<Vec<_>>();
65 session
66 .find_duplicates_touching_files_with_defaults(&dupes_config, &changed_files, cache_dir)
67 .report
68 } else {
69 session
70 .find_duplicates_with_defaults(&dupes_config, cache_dir)
71 .report
72 };
73
74 run_duplication_report_with_session(options, resolved, session, report, start)
75}
76
77pub(super) fn run_duplication_report_with_session(
78 options: &DuplicationOptions,
79 resolved: &ProgrammaticAnalysisContext,
80 session: &AnalysisSession,
81 mut report: fallow_engine::duplicates::DuplicationReport,
82 start: Instant,
83) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
84 let dupes_config = build_dupes_config(options, &session.config().duplicates);
85 if let Some(diff) = resolved.diff.as_ref() {
86 filter_by_diff(&mut report, diff, session.root());
87 }
88 let workspace_roots = workspace_roots_for_session(resolved, session.workspaces())?;
89 if let Some(workspace_roots) = workspace_roots.as_ref() {
90 filter_by_workspaces(&mut report, workspace_roots, session.root());
91 }
92 if let Some(top) = options.top {
93 apply_top(&mut report, top, session.root());
94 }
95
96 let root = session.root();
97 let payload = DupesReportPayload::from_report(&report);
98 let clone_fingerprints = payload
99 .clone_groups
100 .iter()
101 .map(|group| group.fingerprint.as_str())
102 .collect::<Vec<_>>();
103 let next_steps = build_dupes_next_steps(DupesNextStepsInput {
104 suggestions_enabled: suggestions_enabled(),
105 clone_fingerprints: &clone_fingerprints,
106 offer_setup: setup_pointer_applicable(root),
107 impact_digest: None,
108 audit_changed: fallow_engine::churn::is_git_repo(root),
109 });
110 let output: DupesOutput<DupesReportPayload, DuplicationGroup> =
111 build_dupes_output(DupesOutputInput {
112 schema_version: SCHEMA_VERSION,
113 version: env!("CARGO_PKG_VERSION").to_string(),
114 elapsed: start.elapsed(),
115 report: payload,
116 grouped_by: None,
117 total_issues: None,
118 groups: None,
119 meta: resolved.explain_enabled().then(dupes_meta),
120 workspace_diagnostics: session.workspace_diagnostics().to_vec(),
121 next_steps,
122 });
123 Ok(DuplicationProgrammaticOutput {
124 output,
125 root: session.root().to_path_buf(),
126 threshold: dupes_config.threshold,
127 envelope_mode: root_envelope_mode(),
128 telemetry_analysis_run_id: None,
129 })
130}
131
132pub(super) fn load_duplication_session(
133 options: &DuplicationOptions,
134 resolved: &ProgrammaticAnalysisContext,
135) -> ProgrammaticResult<AnalysisSession> {
136 let project_config = fallow_engine::project_config::config_for_project(
137 &resolved.root,
138 resolved.config_path.as_deref(),
139 )
140 .map_err(|err| {
141 ProgrammaticError::new(format!("failed to load config: {err}"), 2)
142 .with_code("FALLOW_CONFIG_LOAD_FAILED")
143 .with_context("analysis.configPath")
144 })?;
145 let project_config = configure_project_for_duplication(project_config, options, resolved);
146 Ok(AnalysisSession::from_config(project_config))
147}
148
149fn configure_project_for_duplication(
150 mut project_config: ProjectConfig,
151 options: &DuplicationOptions,
152 resolved: &ProgrammaticAnalysisContext,
153) -> ProjectConfig {
154 let production = resolved
155 .production_override
156 .unwrap_or(project_config.config.production);
157 project_config.config.production = production;
158 project_config.config.output = OutputFormat::Json;
159 project_config.config.threads = resolved.threads;
160 project_config.config.no_cache = resolved.no_cache;
161 project_config.config.duplicates =
162 build_dupes_config(options, &project_config.config.duplicates);
163 project_config
164}
165
166pub(super) fn build_dupes_config(
167 options: &DuplicationOptions,
168 config: &DuplicatesConfig,
169) -> DuplicatesConfig {
170 DuplicatesConfig {
171 enabled: true,
172 mode: options.mode.map_or(config.mode, duplication_mode_to_config),
173 min_tokens: options.min_tokens.unwrap_or(config.min_tokens),
174 min_lines: options.min_lines.unwrap_or(config.min_lines),
175 min_occurrences: options.min_occurrences.unwrap_or(config.min_occurrences),
176 threshold: options.threshold.unwrap_or(config.threshold),
177 ignore: config.ignore.clone(),
178 ignore_defaults: config.ignore_defaults,
179 skip_local: options.skip_local.unwrap_or(config.skip_local),
180 cross_language: options.cross_language.unwrap_or(config.cross_language),
181 ignore_imports: options.ignore_imports.unwrap_or(config.ignore_imports),
182 normalization: config.normalization.clone(),
183 min_corpus_size_for_shingle_filter: config.min_corpus_size_for_shingle_filter,
184 min_corpus_size_for_token_cache: config.min_corpus_size_for_token_cache,
185 }
186}
187
188const fn duplication_mode_to_config(mode: DuplicationMode) -> DetectionMode {
189 match mode {
190 DuplicationMode::Strict => DetectionMode::Strict,
191 DuplicationMode::Mild => DetectionMode::Mild,
192 DuplicationMode::Weak => DetectionMode::Weak,
193 DuplicationMode::Semantic => DetectionMode::Semantic,
194 }
195}