1use std::time::Instant;
2
3use fallow_config::{DetectionMode, DuplicatesConfig};
4use fallow_engine::{AnalysisSession, ProjectConfig};
5use fallow_output::{
6 DupesNextStepsInput, DupesOutput, DupesOutputInput, build_dupes_next_steps, build_dupes_output,
7 dupes_meta,
8};
9use fallow_types::output_format::OutputFormat;
10use rustc_hash::FxHashSet;
11
12use crate::{
13 DupesReportPayload, DuplicationGroup, DuplicationMode, DuplicationOptions,
14 DuplicationProgrammaticOutput, ProgrammaticError,
15 analysis_context::{
16 ProgrammaticAnalysisContext, changed_files_for_run, resolve_programmatic_analysis_context,
17 },
18 duplication_filters::{apply_top, filter_by_diff, filter_by_workspaces},
19 next_steps::{setup_pointer_applicable, suggestions_enabled},
20};
21
22use super::{ProgrammaticResult, root_envelope_mode};
23
24pub(super) const SCHEMA_VERSION: u32 = 1;
25
26pub fn run_duplication(
33 options: &DuplicationOptions,
34) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
35 let resolved = resolve_programmatic_analysis_context(&options.analysis)?;
36 resolved.install(|| run_duplication_inner(options, &resolved))
37}
38
39fn run_duplication_inner(
40 options: &DuplicationOptions,
41 resolved: &ProgrammaticAnalysisContext,
42) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
43 let start = Instant::now();
44 let session = load_duplication_session(options, resolved)?;
45 run_duplication_with_session(options, resolved, &session, None, start)
46}
47
48pub(super) fn run_duplication_with_session(
49 options: &DuplicationOptions,
50 resolved: &ProgrammaticAnalysisContext,
51 session: &AnalysisSession,
52 changed_files: Option<&FxHashSet<std::path::PathBuf>>,
53 start: Instant,
54) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
55 let dupes_config = build_dupes_config(options, &session.config().duplicates);
56 let resolved_changed_files = if changed_files.is_some() {
57 None
58 } else {
59 changed_files_for_run(resolved)?
60 };
61 let cache_dir = (!resolved.no_cache).then_some(session.config().cache_dir.as_path());
62 let mut report = if let Some(changed_files) = changed_files.or(resolved_changed_files.as_ref())
63 {
64 let changed_files = changed_files.iter().cloned().collect::<Vec<_>>();
65 session
66 .find_duplicates_touching_files_with_defaults(&dupes_config, &changed_files, cache_dir)
67 .report
68 } else {
69 session
70 .find_duplicates_with_defaults(&dupes_config, cache_dir)
71 .report
72 };
73
74 if let Some(diff) = resolved.diff.as_ref() {
75 filter_by_diff(&mut report, diff, session.root());
76 }
77 if let Some(workspace_roots) = resolved.workspace_roots.as_ref() {
78 filter_by_workspaces(&mut report, workspace_roots, session.root());
79 }
80 if let Some(top) = options.top {
81 apply_top(&mut report, top, session.root());
82 }
83
84 let root = session.root();
85 let payload = DupesReportPayload::from_report(&report);
86 let clone_fingerprints = payload
87 .clone_groups
88 .iter()
89 .map(|group| group.fingerprint.as_str())
90 .collect::<Vec<_>>();
91 let next_steps = build_dupes_next_steps(DupesNextStepsInput {
92 suggestions_enabled: suggestions_enabled(),
93 clone_fingerprints: &clone_fingerprints,
94 offer_setup: setup_pointer_applicable(root),
95 impact_digest: None,
96 audit_changed: fallow_engine::is_git_repo(root),
97 });
98 let output: DupesOutput<DupesReportPayload, DuplicationGroup> =
99 build_dupes_output(DupesOutputInput {
100 schema_version: SCHEMA_VERSION,
101 version: env!("CARGO_PKG_VERSION").to_string(),
102 elapsed: start.elapsed(),
103 report: payload,
104 grouped_by: None,
105 total_issues: None,
106 groups: None,
107 meta: resolved.explain_enabled().then(dupes_meta),
108 workspace_diagnostics: session.workspace_diagnostics().to_vec(),
109 next_steps,
110 });
111 Ok(DuplicationProgrammaticOutput {
112 output,
113 root: session.root().to_path_buf(),
114 threshold: dupes_config.threshold,
115 envelope_mode: root_envelope_mode(),
116 telemetry_analysis_run_id: None,
117 })
118}
119
120pub(super) fn load_duplication_session(
121 options: &DuplicationOptions,
122 resolved: &ProgrammaticAnalysisContext,
123) -> ProgrammaticResult<AnalysisSession> {
124 let project_config =
125 fallow_engine::config_for_project(&resolved.root, resolved.config_path.as_deref())
126 .map_err(|err| {
127 ProgrammaticError::new(format!("failed to load config: {err}"), 2)
128 .with_code("FALLOW_CONFIG_LOAD_FAILED")
129 .with_context("analysis.configPath")
130 })?;
131 let project_config = configure_project_for_duplication(project_config, options, resolved);
132 Ok(AnalysisSession::from_config(project_config))
133}
134
135fn configure_project_for_duplication(
136 mut project_config: ProjectConfig,
137 options: &DuplicationOptions,
138 resolved: &ProgrammaticAnalysisContext,
139) -> ProjectConfig {
140 let production = resolved
141 .production_override
142 .unwrap_or(project_config.config.production);
143 project_config.config.production = production;
144 project_config.config.output = OutputFormat::Json;
145 project_config.config.threads = resolved.threads;
146 project_config.config.no_cache = resolved.no_cache;
147 project_config.config.duplicates =
148 build_dupes_config(options, &project_config.config.duplicates);
149 project_config
150}
151
152pub(super) fn build_dupes_config(
153 options: &DuplicationOptions,
154 config: &DuplicatesConfig,
155) -> DuplicatesConfig {
156 DuplicatesConfig {
157 enabled: true,
158 mode: options.mode.map_or(config.mode, duplication_mode_to_config),
159 min_tokens: options.min_tokens.unwrap_or(config.min_tokens),
160 min_lines: options.min_lines.unwrap_or(config.min_lines),
161 min_occurrences: options.min_occurrences.unwrap_or(config.min_occurrences),
162 threshold: options.threshold.unwrap_or(config.threshold),
163 ignore: config.ignore.clone(),
164 ignore_defaults: config.ignore_defaults,
165 skip_local: options.skip_local.unwrap_or(config.skip_local),
166 cross_language: options.cross_language.unwrap_or(config.cross_language),
167 ignore_imports: options.ignore_imports.unwrap_or(config.ignore_imports),
168 normalization: config.normalization.clone(),
169 min_corpus_size_for_shingle_filter: config.min_corpus_size_for_shingle_filter,
170 min_corpus_size_for_token_cache: config.min_corpus_size_for_token_cache,
171 }
172}
173
174const fn duplication_mode_to_config(mode: DuplicationMode) -> DetectionMode {
175 match mode {
176 DuplicationMode::Strict => DetectionMode::Strict,
177 DuplicationMode::Mild => DetectionMode::Mild,
178 DuplicationMode::Weak => DetectionMode::Weak,
179 DuplicationMode::Semantic => DetectionMode::Semantic,
180 }
181}