1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
//! Step job creation and configuration.
//!
//! This module is responsible for creating [`StepJob`]s from a step configuration
//! and a list of files. It handles:
//!
//! - File filtering
//! - Workspace splitting (for monorepos)
//! - Batch mode job creation
//! - Skip reason determination
//! - Check-first mode configuration
use crate::Result;
use crate::hook::SkipReason;
use crate::settings::Settings;
use crate::step_job::StepJob;
use indexmap::IndexMap;
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::Arc;
use super::types::{RunType, Step};
impl Step {
/// Create step jobs from a list of files.
///
/// This is the main entry point for creating executable jobs from a step.
/// It applies all filtering, batching, and skip logic to produce jobs
/// ready for execution.
///
/// # Job Creation Process
///
/// 1. Check for explicit skip (via skip_steps)
/// 2. Check if step has a command for the run type
/// 3. Filter files based on step configuration
/// 4. Split into workspace jobs (if workspace_indicator set) or batch jobs
/// 5. Apply auto-batching for ARG_MAX safety
/// 6. Configure check_first based on file contention
///
/// # Arguments
///
/// * `files` - All files to consider
/// * `run_type` - Whether running check or fix
/// * `files_in_contention` - Files being modified by other steps (for check_first)
/// * `skip_steps` - Steps explicitly marked to skip
///
/// # Returns
///
/// A list of jobs ready for execution
pub(crate) fn build_step_jobs(
&self,
files: &[PathBuf],
run_type: RunType,
files_in_contention: &HashSet<PathBuf>,
skip_steps: &IndexMap<String, SkipReason>,
) -> Result<Vec<StepJob>> {
// Pre-calculate skip reason at the job creation level to simplify run_all_jobs
if skip_steps.contains_key(&self.name) {
let reason = skip_steps.get(&self.name).unwrap().clone();
let mut j = StepJob::new(Arc::new(self.clone()), vec![], run_type);
j.skip_reason = Some(reason);
return Ok(vec![j]);
}
if !self.has_command_for(run_type) {
let mut j = StepJob::new(Arc::new(self.clone()), vec![], run_type);
j.skip_reason = Some(SkipReason::NoCommandForRunType(run_type));
return Ok(vec![j]);
}
if !self.required.is_empty() {
let missing: Vec<String> = self
.required
.iter()
.filter(|e| std::env::var(e).is_err() && !self.env.contains_key(*e))
.cloned()
.collect();
if !missing.is_empty() {
let mut j = StepJob::new(Arc::new(self.clone()), vec![], run_type);
j.skip_reason = Some(SkipReason::MissingRequiredEnv(missing));
return Ok(vec![j]);
}
}
let files = self.filter_files(files)?;
// Skip if no files and step has file filters
// This means the step was explicitly looking for specific files and found none
if files.is_empty() && self.has_filters() {
debug!("{self}: no file matches for step");
let mut j = StepJob::new(Arc::new(self.clone()), vec![], run_type);
j.skip_reason = Some(SkipReason::NoFilesToProcess);
return Ok(vec![j]);
}
let mut jobs = if let Some(workspace_indicators) = self.workspaces_for_files(&files)? {
let mut files = files.clone();
// Compute chunk size from total file count so the total number of
// jobs across all workspaces stays ~jobs_count, not per-workspace.
let chunk_size = (files.len() / Settings::get().jobs().get()).max(1);
workspace_indicators
// Sort the files in reverse so the longest directory can take files in their directories
// and then the shortest path will take the rest of them.
.sorted_by(|a, b| b.as_os_str().len().cmp(&a.as_os_str().len()))
.flat_map(|workspace_indicator| {
let workspace_dir = workspace_indicator.parent();
let mut workspace_files = Vec::new();
let mut i = 0;
while i < files.len() {
if workspace_dir
.map(|dir| files[i].starts_with(dir))
.unwrap_or(true)
{
let val = files.remove(i);
workspace_files.push(val);
} else {
i += 1;
}
}
if self.batch {
workspace_files
.chunks(chunk_size)
.map(|chunk| {
StepJob::new(Arc::new((*self).clone()), chunk.to_vec(), run_type)
.with_workspace_indicator(workspace_indicator.clone())
})
.collect::<Vec<_>>()
} else {
vec![
StepJob::new(Arc::new((*self).clone()), workspace_files, run_type)
.with_workspace_indicator(workspace_indicator),
]
}
})
.collect()
} else if self.batch {
files
.chunks((files.len() / Settings::get().jobs().get()).max(1))
.map(|chunk| StepJob::new(Arc::new((*self).clone()), chunk.to_vec(), run_type))
.collect()
} else {
vec![StepJob::new(
Arc::new((*self).clone()),
files.clone(),
run_type,
)]
};
if self.stdin.is_none() {
// Auto-batch any jobs where the file list would exceed safe limits
jobs = self.auto_batch_jobs_if_needed(jobs);
}
// Apply profile skip only after determining files/no-files, so NoFilesToProcess wins
// Also, if a condition is present, defer profile checks to run() so ConditionFalse wins
if self.job_condition.is_none()
&& let Some(reason) = self.profile_skip_reason()
{
for job in jobs.iter_mut() {
job.skip_reason = Some(reason.clone());
}
}
// If stage=<JOB_FILES> and check_list_files or check_diff is defined, always run check_first
// to ensure files are filtered correctly, even when there's no contention
let needs_filtering_for_stage = self
.stage
.as_ref()
.map(|v| v.len() == 1 && v[0] == "<JOB_FILES>")
.unwrap_or(false)
&& (self.check_list_files.is_some() || self.check_diff.is_some());
// In Fix mode, run check_first when check_diff is defined so we can apply the diff directly.
// In Check mode, this is avoided as check_diff may hide non-auto-fixable errors.
let can_apply_diff = self.check_diff.is_some() && matches!(run_type, RunType::Fix);
for job in jobs.iter_mut() {
if needs_filtering_for_stage || can_apply_diff {
// Always run check_first when we need to filter files for stage=<JOB_FILES>
// or when we can apply the diff directly
job.check_first = true;
} else if job.check_first {
// Only adjust check_first for jobs where it was already enabled from config
// Default behavior: only set check_first if there are any files in contention
job.check_first = job.files.iter().any(|f| files_in_contention.contains(f));
}
}
Ok(jobs)
}
}