Skip to main content

fallow_api/runtime/
duplication.rs

1use std::time::Instant;
2
3use fallow_config::{DetectionMode, DuplicatesConfig};
4use fallow_engine::{AnalysisSession, ProjectConfig};
5use fallow_output::{
6    DupesNextStepsInput, DupesOutput, DupesOutputInput, build_dupes_next_steps, build_dupes_output,
7    dupes_meta,
8};
9use fallow_types::output_format::OutputFormat;
10use rustc_hash::FxHashSet;
11
12use crate::{
13    DupesReportPayload, DuplicationGroup, DuplicationMode, DuplicationOptions,
14    DuplicationProgrammaticOutput, ProgrammaticError,
15    analysis_context::{
16        ProgrammaticAnalysisContext, changed_files_for_run, resolve_programmatic_analysis_context,
17    },
18    duplication_filters::{apply_top, filter_by_diff, filter_by_workspaces},
19    next_steps::{setup_pointer_applicable, suggestions_enabled},
20};
21
22use super::{ProgrammaticResult, root_envelope_mode};
23
24pub(super) const SCHEMA_VERSION: u32 = 1;
25
26/// Run duplication analysis and return typed API output before serialization.
27///
28/// # Errors
29///
30/// Returns a structured programmatic error for invalid options, config load
31/// failures, or git changed-file failures.
32pub fn run_duplication(
33    options: &DuplicationOptions,
34) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
35    let resolved = resolve_programmatic_analysis_context(&options.analysis)?;
36    resolved.install(|| run_duplication_inner(options, &resolved))
37}
38
39fn run_duplication_inner(
40    options: &DuplicationOptions,
41    resolved: &ProgrammaticAnalysisContext,
42) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
43    let start = Instant::now();
44    let session = load_duplication_session(options, resolved)?;
45    run_duplication_with_session(options, resolved, &session, None, start)
46}
47
48pub(super) fn run_duplication_with_session(
49    options: &DuplicationOptions,
50    resolved: &ProgrammaticAnalysisContext,
51    session: &AnalysisSession,
52    changed_files: Option<&FxHashSet<std::path::PathBuf>>,
53    start: Instant,
54) -> ProgrammaticResult<DuplicationProgrammaticOutput> {
55    let dupes_config = build_dupes_config(options, &session.config().duplicates);
56    let resolved_changed_files = if changed_files.is_some() {
57        None
58    } else {
59        changed_files_for_run(resolved)?
60    };
61    let cache_dir = (!resolved.no_cache).then_some(session.config().cache_dir.as_path());
62    let mut report = if let Some(changed_files) = changed_files.or(resolved_changed_files.as_ref())
63    {
64        let changed_files = changed_files.iter().cloned().collect::<Vec<_>>();
65        session
66            .find_duplicates_touching_files_with_defaults(&dupes_config, &changed_files, cache_dir)
67            .report
68    } else {
69        session
70            .find_duplicates_with_defaults(&dupes_config, cache_dir)
71            .report
72    };
73
74    if let Some(diff) = resolved.diff.as_ref() {
75        filter_by_diff(&mut report, diff, session.root());
76    }
77    if let Some(workspace_roots) = resolved.workspace_roots.as_ref() {
78        filter_by_workspaces(&mut report, workspace_roots, session.root());
79    }
80    if let Some(top) = options.top {
81        apply_top(&mut report, top, session.root());
82    }
83
84    let root = session.root();
85    let payload = DupesReportPayload::from_report(&report);
86    let clone_fingerprints = payload
87        .clone_groups
88        .iter()
89        .map(|group| group.fingerprint.as_str())
90        .collect::<Vec<_>>();
91    let next_steps = build_dupes_next_steps(DupesNextStepsInput {
92        suggestions_enabled: suggestions_enabled(),
93        clone_fingerprints: &clone_fingerprints,
94        offer_setup: setup_pointer_applicable(root),
95        impact_digest: None,
96        audit_changed: fallow_engine::is_git_repo(root),
97    });
98    let output: DupesOutput<DupesReportPayload, DuplicationGroup> =
99        build_dupes_output(DupesOutputInput {
100            schema_version: SCHEMA_VERSION,
101            version: env!("CARGO_PKG_VERSION").to_string(),
102            elapsed: start.elapsed(),
103            report: payload,
104            grouped_by: None,
105            total_issues: None,
106            groups: None,
107            meta: resolved.explain_enabled().then(dupes_meta),
108            workspace_diagnostics: session.workspace_diagnostics().to_vec(),
109            next_steps,
110        });
111    Ok(DuplicationProgrammaticOutput {
112        output,
113        root: session.root().to_path_buf(),
114        threshold: dupes_config.threshold,
115        envelope_mode: root_envelope_mode(),
116        telemetry_analysis_run_id: None,
117    })
118}
119
120pub(super) fn load_duplication_session(
121    options: &DuplicationOptions,
122    resolved: &ProgrammaticAnalysisContext,
123) -> ProgrammaticResult<AnalysisSession> {
124    let project_config =
125        fallow_engine::config_for_project(&resolved.root, resolved.config_path.as_deref())
126            .map_err(|err| {
127                ProgrammaticError::new(format!("failed to load config: {err}"), 2)
128                    .with_code("FALLOW_CONFIG_LOAD_FAILED")
129                    .with_context("analysis.configPath")
130            })?;
131    let project_config = configure_project_for_duplication(project_config, options, resolved);
132    Ok(AnalysisSession::from_config(project_config))
133}
134
135fn configure_project_for_duplication(
136    mut project_config: ProjectConfig,
137    options: &DuplicationOptions,
138    resolved: &ProgrammaticAnalysisContext,
139) -> ProjectConfig {
140    let production = resolved
141        .production_override
142        .unwrap_or(project_config.config.production);
143    project_config.config.production = production;
144    project_config.config.output = OutputFormat::Json;
145    project_config.config.threads = resolved.threads;
146    project_config.config.no_cache = resolved.no_cache;
147    project_config.config.duplicates =
148        build_dupes_config(options, &project_config.config.duplicates);
149    project_config
150}
151
152pub(super) fn build_dupes_config(
153    options: &DuplicationOptions,
154    config: &DuplicatesConfig,
155) -> DuplicatesConfig {
156    DuplicatesConfig {
157        enabled: true,
158        mode: options.mode.map_or(config.mode, duplication_mode_to_config),
159        min_tokens: options.min_tokens.unwrap_or(config.min_tokens),
160        min_lines: options.min_lines.unwrap_or(config.min_lines),
161        min_occurrences: options.min_occurrences.unwrap_or(config.min_occurrences),
162        threshold: options.threshold.unwrap_or(config.threshold),
163        ignore: config.ignore.clone(),
164        ignore_defaults: config.ignore_defaults,
165        skip_local: options.skip_local.unwrap_or(config.skip_local),
166        cross_language: options.cross_language.unwrap_or(config.cross_language),
167        ignore_imports: options.ignore_imports.unwrap_or(config.ignore_imports),
168        normalization: config.normalization.clone(),
169        min_corpus_size_for_shingle_filter: config.min_corpus_size_for_shingle_filter,
170        min_corpus_size_for_token_cache: config.min_corpus_size_for_token_cache,
171    }
172}
173
174const fn duplication_mode_to_config(mode: DuplicationMode) -> DetectionMode {
175    match mode {
176        DuplicationMode::Strict => DetectionMode::Strict,
177        DuplicationMode::Mild => DetectionMode::Mild,
178        DuplicationMode::Weak => DetectionMode::Weak,
179        DuplicationMode::Semantic => DetectionMode::Semantic,
180    }
181}