xvc_pipeline/pipeline/step.rs
1#![allow(clippy::enum_variant_names)]
2
3use clap_complete::ArgValueCompleter;
4use derive_more::Display;
5use xvc_core::util::completer::strum_variants_completer;
6
7use crate::error::{Error, Result};
8use crate::{
9 cmd_step_dependency, cmd_step_new, cmd_step_output, cmd_step_show, cmd_step_update, XvcPipeline,
10};
11use clap::Parser;
12use sad_machine::state_machine;
13use serde::{Deserialize, Serialize};
14use xvc_core::XvcRoot;
15use xvc_core::{persist, XvcEntity};
16use xvc_core::XvcOutputSender;
17
18use super::api::step_list::cmd_step_list;
19use super::api::step_remove::cmd_step_remove;
20use super::util::step_name_completer;
21use super::XvcStepInvalidate;
22
23/// Step creation, dependency, output commands
24#[derive(Debug, Clone, Parser)]
25#[command(name = "step")]
26pub struct StepCLI {
27 /// Step subcommand
28 #[command(subcommand)]
29 pub subcommand: StepSubCommand,
30}
31
32/// Step management subcommands
33#[derive(Debug, Clone, Parser)]
34#[command()]
35// This is just a command description and used once
36#[allow(clippy::large_enum_variant)]
37pub enum StepSubCommand {
38 /// List steps in a pipeline
39 #[command(visible_aliases=&["l"])]
40 List {
41 /// Show only the names, otherwise print commands as well.
42 #[arg(long)]
43 names_only: bool,
44 },
45
46 /// Add a new step
47 #[command(visible_aliases=&["n"])]
48 New {
49 /// Name of the new step
50 #[arg(long, short, add = ArgValueCompleter::new(step_name_completer))]
51 step_name: String,
52
53 /// Step command to run
54 #[arg(long, short, value_hint = clap::ValueHint::CommandString)]
55 command: String,
56
57 /// When to run the command. One of always, never, by_dependencies (default).
58 /// This is used to freeze or invalidate a step manually.
59 #[arg(long, add = ArgValueCompleter::new(strum_variants_completer::<XvcStepInvalidate>))]
60 when: Option<XvcStepInvalidate>,
61 },
62
63 /// Remove a step from a pipeline
64 #[command(visible_aliases=&["R"])]
65 Remove {
66 /// Name of the step to remove
67 #[arg(long, short,add = ArgValueCompleter::new(step_name_completer))]
68 step_name: String,
69 },
70
71 /// Update a step's command or when options.
72 #[command(visible_aliases=&["U"])]
73 Update {
74 /// Name of the step to update. The step should already be defined.
75 #[arg(long, short, add = ArgValueCompleter::new(step_name_completer))]
76 step_name: String,
77
78 /// Step command to run
79 #[arg(long, short, value_hint = clap::ValueHint::CommandString)]
80 command: Option<String>,
81
82 /// When to run the command. One of always, never, by_dependencies (default).
83 /// This is used to freeze or invalidate a step manually.
84 #[arg(long, add = ArgValueCompleter::new(strum_variants_completer::<XvcStepInvalidate>))]
85 when: Option<XvcStepInvalidate>,
86 },
87
88 /// Add a dependency to a step
89 #[command(visible_aliases=&["d"])]
90 Dependency {
91 /// Name of the step to add the dependency to
92 #[arg(long, short, visible_aliases= &["for", "to"],add = ArgValueCompleter::new(step_name_completer) )]
93 step_name: String,
94
95 /// Add a generic command output as a dependency. Can be used multiple times.
96 /// Please delimit the command with ' ' to avoid shell expansion.
97 #[arg(long = "generic", short = 'G')]
98 generics: Option<Vec<String>>,
99
100 /// Add a URL dependency to the step. Can be used multiple times.
101 #[arg(long = "url", short)]
102 urls: Option<Vec<String>>,
103
104 /// Add a file dependency to the step. Can be used multiple times.
105 #[arg(long = "file", short, value_hint = clap::ValueHint::FilePath)]
106 files: Option<Vec<String>>,
107
108 /// Add a step dependency to a step. Can be used multiple times.
109 /// Steps are referred with their names.
110 #[arg(long = "step", short = 'S',add = ArgValueCompleter::new(step_name_completer))]
111 steps: Option<Vec<String>>,
112
113 /// Add a glob items dependency to the step.
114 ///
115 /// You can depend on multiple files and directories with this dependency.
116 ///
117 /// The difference between this and the glob option is that this option keeps track of all
118 /// matching files, but glob only keeps track of the matched files' digest. When you want
119 /// to use ${XVC_GLOB_ITEMS}, ${XVC_ADDED_GLOB_ITEMS}, or ${XVC_REMOVED_GLOB_ITEMS}
120 /// environment variables in the step command, use the glob-items dependency. Otherwise,
121 /// you can use the glob option to save disk space.
122 #[arg(long = "glob_items", visible_aliases=&["glob-items", "glob-i"])]
123 glob_items: Option<Vec<String>>,
124
125 /// Add a glob dependency to the step. Can be used multiple times.
126 ///
127 /// You can depend on multiple files and directories with this dependency.
128 ///
129 /// The difference between this and the glob-items option is that the glob-items option
130 /// keeps track of all matching files individually, but this option only keeps track of the
131 /// matched files' digest. This dependency uses considerably less disk space.
132 #[arg(long = "glob", aliases=&["globs"])]
133 globs: Option<Vec<String>>,
134
135 /// Add a parameter dependency to the step in the form filename.yaml::model.units
136 ///
137 /// The file can be a JSON, TOML, or YAML file. You can specify hierarchical keys like
138 /// my.dict.key
139 ///
140 /// TODO: Add a pipeline_step_params completer
141 #[arg(long = "param", aliases = &["params"])]
142 params: Option<Vec<String>>,
143
144 /// Add a regex dependency in the form filename.txt:/^regex/ . Can be used multiple times.
145 ///
146 /// The difference between this and the regex option is that the regex-items option keeps
147 /// track of all matching lines, but regex only keeps track of the matched lines' digest.
148 /// When you want to use ${XVC_REGEX_ITEMS}, ${XVC_ADDED_REGEX_ITEMS},
149 /// ${XVC_REMOVED_REGEX_ITEMS} environment variables in the step command, use the regex
150 /// option. Otherwise, you can use the regex-digest option to save disk space.
151 #[arg(
152 long = "regex_items",
153 aliases = &["regex-items", "regexp_items", "regexp-items"],
154 )]
155 regex_items: Option<Vec<String>>,
156
157 /// Add a regex dependency in the form filename.txt:/^regex/ . Can be used multiple times.
158 ///
159 /// The difference between this and the regex option is that the regex option keeps track
160 /// of all matching lines that can be used in the step command. This option only keeps
161 /// track of the matched lines' digest.
162 #[arg(
163 long = "regex",
164 aliases = &["regexp"],
165 )]
166 regexes: Option<Vec<String>>,
167
168 /// Add a line dependency in the form filename.txt::123-234
169 ///
170 /// The difference between this and the lines option is that the line-items option keeps
171 /// track of all matching lines that can be used in the step command. This option only
172 /// keeps track of the matched lines' digest. When you want to use ${XVC_ALL_LINE_ITEMS},
173 /// ${XVC_ADDED_LINE_ITEMS}, ${XVC_CHANGED_LINE_ITEMS} options in the step command, use the
174 /// line option. Otherwise, you can use the lines option to save disk space.
175 #[arg(
176 long = "line_items",
177 aliases = &["line-items", "line-i"],
178 )]
179 line_items: Option<Vec<String>>,
180
181 /// Add a line digest dependency in the form filename.txt::123-234
182 ///
183 /// The difference between this and the line-items dependency is that the line option keeps
184 /// track of all matching lines that can be used in the step command. This option only
185 /// keeps track of the matched lines' digest. If you don't need individual lines to be
186 /// kept, use this option to save space.
187 #[arg(
188 long = "lines",
189 aliases = &["line"],
190 )]
191 lines: Option<Vec<String>>,
192
193 /// Add a sqlite query dependency to the step with the file and the query. Can be used
194 /// once.
195 ///
196 /// The step is invalidated when the query run and the result is different from previous
197 /// runs, e.g. when an aggregate changed or a new row added to a table.
198 #[arg(
199 long = "sqlite-query",
200 aliases = &["sqlite_query", "sqlite_query_digest", "sqlite-query-digest"],
201 num_args = 2,
202 value_names = &["SQLITE_FILE", "SQLITE_QUERY"],
203 )]
204 sqlite_query: Option<Vec<String>>,
205 },
206
207 /// Add an output to a step
208 #[command(visible_aliases=&["o"])]
209 Output {
210 /// Name of the step to add the output to
211 #[arg(long, short, add = ArgValueCompleter::new(step_name_completer))]
212 step_name: String,
213
214 /// Add a file output to the step. Can be used multiple times.
215 #[arg(long = "output-file", value_hint = clap::ValueHint::FilePath)]
216 files: Option<Vec<String>>,
217
218 /// Add a metric output to the step. Can be used multiple times.
219 #[arg(long = "output-metric", value_hint = clap::ValueHint::FilePath)]
220 metrics: Option<Vec<String>>,
221
222 /// Add an image output to the step. Can be used multiple times.
223 #[arg(long = "output-image", value_hint = clap::ValueHint::FilePath)]
224 images: Option<Vec<String>>,
225 },
226
227 /// Print step configuration
228 #[command(visible_aliases=&["s"])]
229 Show {
230 /// Name of the step to show
231 #[arg(long, short, add = ArgValueCompleter::new(step_name_completer))]
232 step_name: String,
233 },
234}
235
236/// Dispatch `xvc pipeline step` subcommands.
237pub fn handle_step_cli(
238 output_snd: &XvcOutputSender,
239 xvc_root: &XvcRoot,
240 pipeline_name: &str,
241 command: StepCLI,
242) -> Result<()> {
243 match command.subcommand {
244 StepSubCommand::List {
245 names_only: only_names,
246 } => cmd_step_list(output_snd, xvc_root, pipeline_name, only_names),
247
248 StepSubCommand::New {
249 step_name,
250 command,
251 when: changed,
252 } => cmd_step_new(xvc_root, pipeline_name, step_name, command, changed),
253
254 StepSubCommand::Remove { step_name } => {
255 cmd_step_remove(output_snd, xvc_root, pipeline_name, step_name)
256 }
257
258 StepSubCommand::Update {
259 step_name,
260 command,
261 when: changed,
262 } => cmd_step_update(xvc_root, pipeline_name, step_name, command, changed),
263
264 dep_opts @ StepSubCommand::Dependency { .. } => {
265 cmd_step_dependency(output_snd, xvc_root, pipeline_name, dep_opts)
266 }
267
268 StepSubCommand::Output {
269 step_name,
270 files,
271 metrics,
272 images,
273 } => cmd_step_output(xvc_root, pipeline_name, step_name, files, metrics, images),
274 StepSubCommand::Show { step_name } => cmd_step_show(xvc_root, pipeline_name, step_name),
275 }
276}
277
278/// A step (stage) in a pipeline.
279#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Ord, PartialOrd, Display)]
280pub struct XvcStep {
281 /// Name of the step
282 pub name: String,
283}
284
285persist!(XvcStep, "xvc-step");
286
287impl XvcStep {
288 /// Search for a step with the given name in the given pipeline.
289 pub fn from_name(
290 xvc_root: &XvcRoot,
291 pipeline_e: &XvcEntity,
292 step_name: &str,
293 ) -> Result<(XvcEntity, Self)> {
294 let step = XvcStep {
295 name: step_name.to_string(),
296 };
297
298 let pipeline_step_store = xvc_root.load_r1nstore::<XvcPipeline, XvcStep>()?;
299 let pipeline_steps = pipeline_step_store.children_of(pipeline_e)?;
300 match pipeline_steps.entity_by_value(&step) {
301 Some(step_e) => Ok((step_e, step)),
302 None => Err(Error::StepNotFoundInPipeline {
303 step: step_name.to_string(),
304 }),
305 }
306 }
307
308 /// Search for a step with the given entity in the given pipeline.
309 pub fn from_entity(
310 xvc_root: &XvcRoot,
311 pipeline_e: &XvcEntity,
312 step_e: &XvcEntity,
313 ) -> Result<(XvcEntity, Self)> {
314 let pipeline_step_store = xvc_root.load_r1nstore::<XvcPipeline, XvcStep>()?;
315 let pipeline_steps = pipeline_step_store.children_of(pipeline_e)?;
316 match pipeline_steps.get(step_e) {
317 Some(step) => Ok((*step_e, step.clone())),
318 None => Err(Error::StepNotFoundInPipeline {
319 step: format!("Step with entity {}", step_e),
320 }),
321 }
322 }
323}
324
325// TODO: Link to the Documentation after it's written: https://github.com/iesahin/xvc/issues/202
326// ```mermaid
327// stateDiagram-v2
328// [*] --> Begin
329// Begin --> DoneWithoutRunning: RunNever
330// Begin --> WaitingDependencySteps: RunConditional
331// WaitingDependencySteps --> WaitingDependencySteps: DependencyStepsRunning
332// WaitingDependencySteps --> Broken: DependencyStepsFinishedBroken
333// WaitingDependencySteps --> CheckingOutputs: DependencyStepsFinishedBrokenIgnored
334// WaitingDependencySteps --> CheckingOutputs: DependencyStepsFinishedSuccessfully
335// CheckingOutputs --> CheckingSuperficialDiffs: OutputsIgnored
336// CheckingOutputs --> CheckingSuperficialDiffs: CheckedOutputs
337// CheckingSuperficialDiffs --> CheckingThoroughDiffs: SuperficialDiffsIgnored
338// CheckingSuperficialDiffs --> ComparingDiffsAndOutputs: SuperficialDiffsNotChanged
339// CheckingSuperficialDiffs --> CheckingThoroughDiffs: SuperficialDiffsChanged
340// CheckingSuperficialDiffs --> Broken: HasMissingDependencies
341// CheckingThoroughDiffs --> ComparingDiffsAndOutputs: ThoroughDiffsNotChanged
342// CheckingThoroughDiffs --> ComparingDiffsAndOutputs: ThoroughDiffsChanged
343// ComparingDiffsAndOutputs --> WaitingToRun: DiffsHasChanged
344// ComparingDiffsAndOutputs --> DoneWithoutRunning: DiffsHasNotChanged
345// DoneWithoutRunning --> Done: CompletedWithoutRunningStep
346// WaitingToRun --> WaitingToRun: ProcessPoolFull
347// WaitingToRun --> Running: StartProcess
348// WaitingToRun --> Broken: CannotStartProcess
349// Running --> Running: WaitProcess
350// Running --> Broken: ProcessTimeout
351// Running --> Done: ProcessCompletedSuccessfully
352// Running --> Broken: ProcessReturnedNonZero
353// Broken --> Broken: KeepBroken
354// Done --> Done: KeepDone
355// Broken --> [*]
356// Done --> [*]
357// ```
358
359state_machine! {
360 XvcStepState {
361 InitialStates { Begin }
362
363 RunNever {
364 Begin => DoneWithoutRunning
365 }
366
367 RunConditional {
368 Begin => WaitingDependencySteps
369 }
370
371 DependencyStepsFinishedBrokenIgnored {
372 WaitingDependencySteps => CheckingOutputs
373 }
374
375
376 DependencyStepsRunning {
377 WaitingDependencySteps => WaitingDependencySteps
378 }
379
380 DependencyStepsFinishedSuccessfully {
381 WaitingDependencySteps => CheckingOutputs
382 }
383
384 DependencyStepsFinishedBroken {
385 WaitingDependencySteps => Broken
386 }
387
388 OutputsIgnored {
389 CheckingOutputs => CheckingSuperficialDiffs
390 }
391
392 CheckedOutputs {
393 CheckingOutputs => CheckingSuperficialDiffs
394 }
395
396 SuperficialDiffsIgnored {
397 CheckingSuperficialDiffs => CheckingThoroughDiffs
398 }
399
400 SuperficialDiffsNotChanged {
401 CheckingSuperficialDiffs => ComparingDiffsAndOutputs
402 }
403
404 SuperficialDiffsChanged {
405 CheckingSuperficialDiffs => CheckingThoroughDiffs
406 }
407
408 HasMissingDependencies {
409 CheckingSuperficialDiffs => Broken
410 }
411
412 ThoroughDiffsNotChanged {
413 CheckingThoroughDiffs => ComparingDiffsAndOutputs
414 }
415
416 ThoroughDiffsChanged {
417 CheckingThoroughDiffs => ComparingDiffsAndOutputs
418 }
419
420 RunAlways {
421 ComparingDiffsAndOutputs => WaitingToRun
422 }
423
424 DiffsHasChanged {
425 ComparingDiffsAndOutputs => WaitingToRun
426 }
427
428 DiffsHasNotChanged {
429 ComparingDiffsAndOutputs => DoneWithoutRunning
430 }
431
432 ProcessPoolFull {
433 WaitingToRun => WaitingToRun
434 }
435
436 StartProcess {
437 WaitingToRun => Running
438 }
439
440 CannotStartProcess {
441 WaitingToRun => Broken
442 }
443
444 WaitProcess {
445 Running => Running
446 }
447
448 ProcessTimeout {
449 Running => Broken
450 }
451
452 ProcessCompletedSuccessfully {
453 Running => DoneByRunning
454 }
455
456 ProcessReturnedNonZero {
457 Running => Broken
458 }
459
460 KeepBroken {
461 Broken => Broken
462 }
463
464 KeepDone {
465 DoneByRunning => DoneByRunning
466 }
467
468 KeepDone {
469 DoneWithoutRunning => DoneWithoutRunning
470 }
471 }
472
473}