xvc_pipeline/pipeline/
step.rs

1#![allow(clippy::enum_variant_names)]
2
3use clap_complete::ArgValueCompleter;
4use derive_more::Display;
5use xvc_core::util::completer::strum_variants_completer;
6
7use crate::error::{Error, Result};
8use crate::{
9    cmd_step_dependency, cmd_step_new, cmd_step_output, cmd_step_show, cmd_step_update, XvcPipeline,
10};
11use clap::Parser;
12use sad_machine::state_machine;
13use serde::{Deserialize, Serialize};
14use xvc_core::XvcRoot;
15use xvc_core::{persist, XvcEntity};
16use xvc_core::XvcOutputSender;
17
18use super::api::step_list::cmd_step_list;
19use super::api::step_remove::cmd_step_remove;
20use super::util::step_name_completer;
21use super::XvcStepInvalidate;
22
23/// Step creation, dependency, output commands
24#[derive(Debug, Clone, Parser)]
25#[command(name = "step")]
26pub struct StepCLI {
27    /// Step subcommand
28    #[command(subcommand)]
29    pub subcommand: StepSubCommand,
30}
31
32/// Step management subcommands
33#[derive(Debug, Clone, Parser)]
34#[command()]
35// This is just a command description and used once
36#[allow(clippy::large_enum_variant)]
37pub enum StepSubCommand {
38    /// List steps in a pipeline
39    #[command(visible_aliases=&["l"])]
40    List {
41        /// Show only the names, otherwise print commands as well.
42        #[arg(long)]
43        names_only: bool,
44    },
45
46    /// Add a new step
47    #[command(visible_aliases=&["n"])]
48    New {
49        /// Name of the new step
50        #[arg(long, short, add = ArgValueCompleter::new(step_name_completer))]
51        step_name: String,
52
53        /// Step command to run
54        #[arg(long, short, value_hint = clap::ValueHint::CommandString)]
55        command: String,
56
57        /// When to run the command. One of always, never, by_dependencies (default).
58        /// This is used to freeze or invalidate a step manually.
59        #[arg(long, add = ArgValueCompleter::new(strum_variants_completer::<XvcStepInvalidate>))]
60        when: Option<XvcStepInvalidate>,
61    },
62
63    /// Remove a step from a pipeline
64    #[command(visible_aliases=&["R"])]
65    Remove {
66        /// Name of the step to remove
67        #[arg(long, short,add = ArgValueCompleter::new(step_name_completer))]
68        step_name: String,
69    },
70
71    /// Update a step's command or when options.
72    #[command(visible_aliases=&["U"])]
73    Update {
74        /// Name of the step to update. The step should already be defined.
75        #[arg(long, short, add = ArgValueCompleter::new(step_name_completer))]
76        step_name: String,
77
78        /// Step command to run
79        #[arg(long, short, value_hint = clap::ValueHint::CommandString)]
80        command: Option<String>,
81
82        /// When to run the command. One of always, never, by_dependencies (default).
83        /// This is used to freeze or invalidate a step manually.
84        #[arg(long, add = ArgValueCompleter::new(strum_variants_completer::<XvcStepInvalidate>))]
85        when: Option<XvcStepInvalidate>,
86    },
87
88    /// Add a dependency to a step
89    #[command(visible_aliases=&["d"])]
90    Dependency {
91        /// Name of the step to add the dependency to
92        #[arg(long, short, visible_aliases= &["for", "to"],add = ArgValueCompleter::new(step_name_completer) )]
93        step_name: String,
94
95        /// Add a generic command output as a dependency. Can be used multiple times.
96        /// Please delimit the command with ' ' to avoid shell expansion.
97        #[arg(long = "generic", short = 'G')]
98        generics: Option<Vec<String>>,
99
100        /// Add a URL dependency to the step. Can be used multiple times.
101        #[arg(long = "url", short)]
102        urls: Option<Vec<String>>,
103
104        /// Add a file dependency to the step. Can be used multiple times.
105        #[arg(long = "file", short, value_hint = clap::ValueHint::FilePath)]
106        files: Option<Vec<String>>,
107
108        /// Add a step dependency to a step. Can be used multiple times.
109        /// Steps are referred with their names.
110        #[arg(long = "step", short = 'S',add = ArgValueCompleter::new(step_name_completer))]
111        steps: Option<Vec<String>>,
112
113        /// Add a glob items dependency to the step.
114        ///
115        /// You can depend on multiple files and directories with this dependency.
116        ///
117        /// The difference between this and the glob option is that this option keeps track of all
118        /// matching files, but glob only keeps track of the matched files' digest. When you want
119        /// to use ${XVC_GLOB_ITEMS}, ${XVC_ADDED_GLOB_ITEMS}, or ${XVC_REMOVED_GLOB_ITEMS}
120        /// environment variables in the step command, use the glob-items dependency. Otherwise,
121        /// you can use the glob option to save disk space.
122        #[arg(long = "glob_items", visible_aliases=&["glob-items", "glob-i"])]
123        glob_items: Option<Vec<String>>,
124
125        /// Add a glob dependency to the step. Can be used multiple times.
126        ///
127        /// You can depend on multiple files and directories with this dependency.
128        ///
129        /// The difference between this and the glob-items option is that the glob-items option
130        /// keeps track of all matching files individually, but this option only keeps track of the
131        /// matched files' digest. This dependency uses considerably less disk space.
132        #[arg(long = "glob", aliases=&["globs"])]
133        globs: Option<Vec<String>>,
134
135        /// Add a parameter dependency to the step in the form filename.yaml::model.units
136        ///
137        /// The file can be a JSON, TOML, or YAML file. You can specify hierarchical keys like
138        /// my.dict.key
139        ///
140        /// TODO: Add a pipeline_step_params completer
141        #[arg(long = "param", aliases = &["params"])]
142        params: Option<Vec<String>>,
143
144        /// Add a regex dependency in the form filename.txt:/^regex/ . Can be used multiple times.
145        ///
146        /// The difference between this and the regex option is that the regex-items option keeps
147        /// track of all matching lines, but regex only keeps track of the matched lines' digest.
148        /// When you want to use ${XVC_REGEX_ITEMS}, ${XVC_ADDED_REGEX_ITEMS},
149        /// ${XVC_REMOVED_REGEX_ITEMS} environment variables in the step command, use the regex
150        /// option. Otherwise, you can use the regex-digest option to save disk space.
151        #[arg(
152            long = "regex_items",
153            aliases = &["regex-items", "regexp_items", "regexp-items"],
154        )]
155        regex_items: Option<Vec<String>>,
156
157        /// Add a regex dependency in the form filename.txt:/^regex/ . Can be used multiple times.
158        ///
159        /// The difference between this and the regex option is that the regex option keeps track
160        /// of all matching lines that can be used in the step command. This option only keeps
161        /// track of the matched lines' digest.
162        #[arg(
163            long = "regex",
164            aliases = &["regexp"],
165        )]
166        regexes: Option<Vec<String>>,
167
168        /// Add a line dependency in the form filename.txt::123-234
169        ///
170        /// The difference between this and the lines option is that the line-items option keeps
171        /// track of all matching lines that can be used in the step command. This option only
172        /// keeps track of the matched lines' digest. When you want to use ${XVC_ALL_LINE_ITEMS},
173        /// ${XVC_ADDED_LINE_ITEMS}, ${XVC_CHANGED_LINE_ITEMS} options in the step command, use the
174        /// line option. Otherwise, you can use the lines option to save disk space.
175        #[arg(
176            long = "line_items",
177            aliases = &["line-items", "line-i"],
178        )]
179        line_items: Option<Vec<String>>,
180
181        /// Add a line digest dependency in the form filename.txt::123-234
182        ///
183        /// The difference between this and the line-items dependency is that the line option keeps
184        /// track of all matching lines that can be used in the step command. This option only
185        /// keeps track of the matched lines' digest. If you don't need individual lines to be
186        /// kept, use this option to save space.
187        #[arg(
188            long = "lines",
189            aliases = &["line"],
190        )]
191        lines: Option<Vec<String>>,
192
193        /// Add a sqlite query dependency to the step with the file and the query. Can be used
194        /// once.
195        ///
196        /// The step is invalidated when the query run and the result is different from previous
197        /// runs, e.g. when an aggregate changed or a new row added to a table.
198        #[arg(
199            long = "sqlite-query",
200            aliases = &["sqlite_query", "sqlite_query_digest", "sqlite-query-digest"],
201            num_args = 2,
202            value_names = &["SQLITE_FILE", "SQLITE_QUERY"],
203        )]
204        sqlite_query: Option<Vec<String>>,
205    },
206
207    /// Add an output to a step
208    #[command(visible_aliases=&["o"])]
209    Output {
210        /// Name of the step to add the output to
211        #[arg(long, short, add = ArgValueCompleter::new(step_name_completer))]
212        step_name: String,
213
214        /// Add a file output to the step. Can be used multiple times.
215        #[arg(long = "output-file", value_hint = clap::ValueHint::FilePath)]
216        files: Option<Vec<String>>,
217
218        /// Add a metric output to the step. Can be used multiple times.
219        #[arg(long = "output-metric", value_hint = clap::ValueHint::FilePath)]
220        metrics: Option<Vec<String>>,
221
222        /// Add an image output to the step. Can be used multiple times.
223        #[arg(long = "output-image", value_hint = clap::ValueHint::FilePath)]
224        images: Option<Vec<String>>,
225    },
226
227    /// Print step configuration
228    #[command(visible_aliases=&["s"])]
229    Show {
230        /// Name of the step to show
231        #[arg(long, short, add = ArgValueCompleter::new(step_name_completer))]
232        step_name: String,
233    },
234}
235
236/// Dispatch `xvc pipeline step` subcommands.
237pub fn handle_step_cli(
238    output_snd: &XvcOutputSender,
239    xvc_root: &XvcRoot,
240    pipeline_name: &str,
241    command: StepCLI,
242) -> Result<()> {
243    match command.subcommand {
244        StepSubCommand::List {
245            names_only: only_names,
246        } => cmd_step_list(output_snd, xvc_root, pipeline_name, only_names),
247
248        StepSubCommand::New {
249            step_name,
250            command,
251            when: changed,
252        } => cmd_step_new(xvc_root, pipeline_name, step_name, command, changed),
253
254        StepSubCommand::Remove { step_name } => {
255            cmd_step_remove(output_snd, xvc_root, pipeline_name, step_name)
256        }
257
258        StepSubCommand::Update {
259            step_name,
260            command,
261            when: changed,
262        } => cmd_step_update(xvc_root, pipeline_name, step_name, command, changed),
263
264        dep_opts @ StepSubCommand::Dependency { .. } => {
265            cmd_step_dependency(output_snd, xvc_root, pipeline_name, dep_opts)
266        }
267
268        StepSubCommand::Output {
269            step_name,
270            files,
271            metrics,
272            images,
273        } => cmd_step_output(xvc_root, pipeline_name, step_name, files, metrics, images),
274        StepSubCommand::Show { step_name } => cmd_step_show(xvc_root, pipeline_name, step_name),
275    }
276}
277
278/// A step (stage) in a pipeline.
279#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Ord, PartialOrd, Display)]
280pub struct XvcStep {
281    /// Name of the step
282    pub name: String,
283}
284
285persist!(XvcStep, "xvc-step");
286
287impl XvcStep {
288    /// Search for a step with the given name in the given pipeline.
289    pub fn from_name(
290        xvc_root: &XvcRoot,
291        pipeline_e: &XvcEntity,
292        step_name: &str,
293    ) -> Result<(XvcEntity, Self)> {
294        let step = XvcStep {
295            name: step_name.to_string(),
296        };
297
298        let pipeline_step_store = xvc_root.load_r1nstore::<XvcPipeline, XvcStep>()?;
299        let pipeline_steps = pipeline_step_store.children_of(pipeline_e)?;
300        match pipeline_steps.entity_by_value(&step) {
301            Some(step_e) => Ok((step_e, step)),
302            None => Err(Error::StepNotFoundInPipeline {
303                step: step_name.to_string(),
304            }),
305        }
306    }
307
308    /// Search for a step with the given entity in the given pipeline.
309    pub fn from_entity(
310        xvc_root: &XvcRoot,
311        pipeline_e: &XvcEntity,
312        step_e: &XvcEntity,
313    ) -> Result<(XvcEntity, Self)> {
314        let pipeline_step_store = xvc_root.load_r1nstore::<XvcPipeline, XvcStep>()?;
315        let pipeline_steps = pipeline_step_store.children_of(pipeline_e)?;
316        match pipeline_steps.get(step_e) {
317            Some(step) => Ok((*step_e, step.clone())),
318            None => Err(Error::StepNotFoundInPipeline {
319                step: format!("Step with entity {}", step_e),
320            }),
321        }
322    }
323}
324
325// TODO: Link to the Documentation after it's written: https://github.com/iesahin/xvc/issues/202
326// ```mermaid
327// stateDiagram-v2
328//     [*] --> Begin
329//     Begin --> DoneWithoutRunning: RunNever
330//     Begin --> WaitingDependencySteps: RunConditional
331//     WaitingDependencySteps --> WaitingDependencySteps: DependencyStepsRunning
332//     WaitingDependencySteps --> Broken: DependencyStepsFinishedBroken
333//     WaitingDependencySteps --> CheckingOutputs: DependencyStepsFinishedBrokenIgnored
334//     WaitingDependencySteps --> CheckingOutputs: DependencyStepsFinishedSuccessfully
335//     CheckingOutputs --> CheckingSuperficialDiffs: OutputsIgnored
336//     CheckingOutputs --> CheckingSuperficialDiffs: CheckedOutputs
337//     CheckingSuperficialDiffs --> CheckingThoroughDiffs: SuperficialDiffsIgnored
338//     CheckingSuperficialDiffs --> ComparingDiffsAndOutputs: SuperficialDiffsNotChanged
339//     CheckingSuperficialDiffs --> CheckingThoroughDiffs: SuperficialDiffsChanged
340//     CheckingSuperficialDiffs --> Broken: HasMissingDependencies
341//     CheckingThoroughDiffs --> ComparingDiffsAndOutputs: ThoroughDiffsNotChanged
342//     CheckingThoroughDiffs --> ComparingDiffsAndOutputs: ThoroughDiffsChanged
343//     ComparingDiffsAndOutputs --> WaitingToRun: DiffsHasChanged
344//     ComparingDiffsAndOutputs --> DoneWithoutRunning: DiffsHasNotChanged
345//     DoneWithoutRunning --> Done: CompletedWithoutRunningStep
346//     WaitingToRun --> WaitingToRun: ProcessPoolFull
347//     WaitingToRun --> Running: StartProcess
348//     WaitingToRun --> Broken: CannotStartProcess
349//     Running --> Running: WaitProcess
350//     Running --> Broken: ProcessTimeout
351//     Running --> Done: ProcessCompletedSuccessfully
352//     Running --> Broken: ProcessReturnedNonZero
353//     Broken --> Broken: KeepBroken
354//     Done --> Done: KeepDone
355//     Broken --> [*]
356//     Done --> [*]
357// ```
358
359state_machine! {
360    XvcStepState {
361        InitialStates { Begin }
362
363        RunNever {
364            Begin => DoneWithoutRunning
365        }
366
367        RunConditional {
368            Begin => WaitingDependencySteps
369        }
370
371        DependencyStepsFinishedBrokenIgnored {
372            WaitingDependencySteps => CheckingOutputs
373        }
374
375
376        DependencyStepsRunning {
377            WaitingDependencySteps => WaitingDependencySteps
378        }
379
380        DependencyStepsFinishedSuccessfully {
381            WaitingDependencySteps => CheckingOutputs
382        }
383
384        DependencyStepsFinishedBroken {
385            WaitingDependencySteps => Broken
386        }
387
388        OutputsIgnored {
389            CheckingOutputs => CheckingSuperficialDiffs
390        }
391
392        CheckedOutputs {
393            CheckingOutputs => CheckingSuperficialDiffs
394        }
395
396        SuperficialDiffsIgnored {
397           CheckingSuperficialDiffs => CheckingThoroughDiffs
398        }
399
400        SuperficialDiffsNotChanged {
401           CheckingSuperficialDiffs => ComparingDiffsAndOutputs
402        }
403
404        SuperficialDiffsChanged {
405           CheckingSuperficialDiffs => CheckingThoroughDiffs
406        }
407
408        HasMissingDependencies {
409            CheckingSuperficialDiffs => Broken
410        }
411
412        ThoroughDiffsNotChanged {
413            CheckingThoroughDiffs => ComparingDiffsAndOutputs
414        }
415
416        ThoroughDiffsChanged {
417            CheckingThoroughDiffs => ComparingDiffsAndOutputs
418        }
419
420        RunAlways {
421            ComparingDiffsAndOutputs => WaitingToRun
422        }
423
424        DiffsHasChanged {
425            ComparingDiffsAndOutputs => WaitingToRun
426        }
427
428        DiffsHasNotChanged {
429            ComparingDiffsAndOutputs => DoneWithoutRunning
430        }
431
432        ProcessPoolFull {
433            WaitingToRun => WaitingToRun
434        }
435
436        StartProcess {
437            WaitingToRun => Running
438        }
439
440        CannotStartProcess {
441            WaitingToRun => Broken
442        }
443
444        WaitProcess {
445            Running => Running
446        }
447
448        ProcessTimeout {
449            Running => Broken
450        }
451
452        ProcessCompletedSuccessfully {
453            Running => DoneByRunning
454        }
455
456        ProcessReturnedNonZero {
457            Running => Broken
458        }
459
460        KeepBroken {
461            Broken => Broken
462        }
463
464        KeepDone {
465            DoneByRunning => DoneByRunning
466        }
467
468        KeepDone {
469            DoneWithoutRunning => DoneWithoutRunning
470        }
471    }
472
473}
xvc_pipeline/pipeline/step.rs

xvc_pipeline/pipeline/
step.rs