Skip to main content

fallow_api/runtime/
duplication.rs

1use std::time::Instant;
2
3use fallow_config::{DetectionMode, DuplicatesConfig};
4use fallow_engine::{project_config::ProjectConfig, session::AnalysisSession};
5use fallow_output::{
6    DupesNextStepsInput, DupesOutput, DupesOutputInput, build_dupes_next_steps, build_dupes_output,
7    dupes_meta,
8};
9use fallow_types::output_format::OutputFormat;
10use rustc_hash::FxHashSet;
11
12use crate::{
13    DupesReportPayload, DuplicationGroup, DuplicationMode, DuplicationOptions,
14    DuplicationProgrammaticOutput, ProgrammaticError,
15    analysis_context::{
16        ProgrammaticAnalysisContext, changed_files_for_run,
17        resolve_programmatic_analysis_context_deferred_workspace, workspace_roots_for_session,
18    },
19    duplication_filters::{apply_top, filter_by_diff, filter_by_workspaces},
20    next_steps::{setup_pointer_applicable, suggestions_enabled},
21};
22
23use super::{ProgrammaticResult, root_envelope_mode};
24
25pub(super) const SCHEMA_VERSION: u32 = 1;
26
27/// Run duplication analysis and return typed API output before serialization.
28///
29/// # Errors
30///
31/// Returns a structured programmatic error for invalid options, config load
32/// failures, or git changed-file failures.
33pub fn run_duplication(
34    options: &DuplicationOptions,
35) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
36    let resolved = resolve_programmatic_analysis_context_deferred_workspace(&options.analysis)?;
37    resolved.install(|| run_duplication_inner(options, &resolved))
38}
39
40fn run_duplication_inner(
41    options: &DuplicationOptions,
42    resolved: &ProgrammaticAnalysisContext,
43) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
44    let start = Instant::now();
45    let session = load_duplication_session(options, resolved)?;
46    run_duplication_with_session(options, resolved, &session, None, start)
47}
48
49pub(super) fn run_duplication_with_session(
50    options: &DuplicationOptions,
51    resolved: &ProgrammaticAnalysisContext,
52    session: &AnalysisSession,
53    changed_files: Option<&FxHashSet<std::path::PathBuf>>,
54    start: Instant,
55) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
56    let dupes_config = build_dupes_config(options, &session.config().duplicates);
57    let resolved_changed_files = if changed_files.is_some() {
58        None
59    } else {
60        changed_files_for_run(resolved)?
61    };
62    let cache_dir = (!resolved.no_cache).then_some(session.config().cache_dir.as_path());
63    let report = if let Some(changed_files) = changed_files.or(resolved_changed_files.as_ref()) {
64        let changed_files = changed_files.iter().cloned().collect::<Vec<_>>();
65        session
66            .find_duplicates_touching_files_with_defaults(&dupes_config, &changed_files, cache_dir)
67            .report
68    } else {
69        session
70            .find_duplicates_with_defaults(&dupes_config, cache_dir)
71            .report
72    };
73
74    run_duplication_report_with_session(options, resolved, session, report, start)
75}
76
77pub(super) fn run_duplication_report_with_session(
78    options: &DuplicationOptions,
79    resolved: &ProgrammaticAnalysisContext,
80    session: &AnalysisSession,
81    mut report: fallow_engine::duplicates::DuplicationReport,
82    start: Instant,
83) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
84    let dupes_config = build_dupes_config(options, &session.config().duplicates);
85    if let Some(diff) = resolved.diff.as_ref() {
86        filter_by_diff(&mut report, diff, session.root());
87    }
88    let workspace_roots = workspace_roots_for_session(resolved, session.workspaces())?;
89    if let Some(workspace_roots) = workspace_roots.as_ref() {
90        filter_by_workspaces(&mut report, workspace_roots, session.root());
91    }
92    if let Some(top) = options.top {
93        apply_top(&mut report, top, session.root());
94    }
95
96    let root = session.root();
97    let payload = DupesReportPayload::from_report(&report);
98    let clone_fingerprints = payload
99        .clone_groups
100        .iter()
101        .map(|group| group.fingerprint.as_str())
102        .collect::<Vec<_>>();
103    let next_steps = build_dupes_next_steps(DupesNextStepsInput {
104        suggestions_enabled: suggestions_enabled(),
105        clone_fingerprints: &clone_fingerprints,
106        offer_setup: setup_pointer_applicable(root),
107        impact_digest: None,
108        audit_changed: fallow_engine::churn::is_git_repo(root),
109    });
110    let output: DupesOutput<DupesReportPayload, DuplicationGroup> =
111        build_dupes_output(DupesOutputInput {
112            schema_version: SCHEMA_VERSION,
113            version: env!("CARGO_PKG_VERSION").to_string(),
114            elapsed: start.elapsed(),
115            report: payload,
116            grouped_by: None,
117            total_issues: None,
118            groups: None,
119            meta: resolved.explain_enabled().then(dupes_meta),
120            workspace_diagnostics: session.workspace_diagnostics().to_vec(),
121            next_steps,
122        });
123    Ok(DuplicationProgrammaticOutput {
124        output,
125        root: session.root().to_path_buf(),
126        threshold: dupes_config.threshold,
127        envelope_mode: root_envelope_mode(),
128        telemetry_analysis_run_id: None,
129    })
130}
131
132pub(super) fn load_duplication_session(
133    options: &DuplicationOptions,
134    resolved: &ProgrammaticAnalysisContext,
135) -> ProgrammaticResult<AnalysisSession> {
136    let project_config = fallow_engine::project_config::config_for_project(
137        &resolved.root,
138        resolved.config_path.as_deref(),
139    )
140    .map_err(|err| {
141        ProgrammaticError::new(format!("failed to load config: {err}"), 2)
142            .with_code("FALLOW_CONFIG_LOAD_FAILED")
143            .with_context("analysis.configPath")
144    })?;
145    let project_config = configure_project_for_duplication(project_config, options, resolved);
146    Ok(AnalysisSession::from_config(project_config))
147}
148
149fn configure_project_for_duplication(
150    mut project_config: ProjectConfig,
151    options: &DuplicationOptions,
152    resolved: &ProgrammaticAnalysisContext,
153) -> ProjectConfig {
154    let production = resolved
155        .production_override
156        .unwrap_or(project_config.config.production);
157    project_config.config.production = production;
158    project_config.config.output = OutputFormat::Json;
159    project_config.config.threads = resolved.threads;
160    project_config.config.no_cache = resolved.no_cache;
161    project_config.config.duplicates =
162        build_dupes_config(options, &project_config.config.duplicates);
163    project_config
164}
165
166pub(super) fn build_dupes_config(
167    options: &DuplicationOptions,
168    config: &DuplicatesConfig,
169) -> DuplicatesConfig {
170    DuplicatesConfig {
171        enabled: true,
172        mode: options.mode.map_or(config.mode, duplication_mode_to_config),
173        min_tokens: options.min_tokens.unwrap_or(config.min_tokens),
174        min_lines: options.min_lines.unwrap_or(config.min_lines),
175        min_occurrences: options.min_occurrences.unwrap_or(config.min_occurrences),
176        threshold: options.threshold.unwrap_or(config.threshold),
177        ignore: config.ignore.clone(),
178        ignore_defaults: config.ignore_defaults,
179        skip_local: options.skip_local.unwrap_or(config.skip_local),
180        cross_language: options.cross_language.unwrap_or(config.cross_language),
181        ignore_imports: options.ignore_imports.unwrap_or(config.ignore_imports),
182        normalization: config.normalization.clone(),
183        min_corpus_size_for_shingle_filter: config.min_corpus_size_for_shingle_filter,
184        min_corpus_size_for_token_cache: config.min_corpus_size_for_token_cache,
185    }
186}
187
188const fn duplication_mode_to_config(mode: DuplicationMode) -> DetectionMode {
189    match mode {
190        DuplicationMode::Strict => DetectionMode::Strict,
191        DuplicationMode::Mild => DetectionMode::Mild,
192        DuplicationMode::Weak => DetectionMode::Weak,
193        DuplicationMode::Semantic => DetectionMode::Semantic,
194    }
195}