xvc_pipeline/pipeline/deps/
mod.rs

1//! Step dependencies implementation
2pub mod compare;
3pub mod file;
4pub mod generic;
5pub mod glob;
6pub mod glob_items;
7pub mod line_items;
8pub mod lines;
9pub mod param;
10pub mod regex;
11pub mod regex_items;
12pub mod sqlite_query;
13pub mod step;
14pub mod url;
15
16use std::fmt::{Display, Formatter};
17
18use itertools::Itertools;
19pub use param::*;
20
21use serde::{Deserialize, Serialize};
22use xvc_core::XvcPathMetadataProvider;
23
24use crate::error::{Error, Result};
25use xvc_core::XvcConfig;
26use xvc_core::{glob_includes, glob_paths, XvcPath, XvcPathMetadataMap, XvcRoot};
27use xvc_core::{persist, HStore, XvcStore};
28
29pub use self::file::FileDep;
30pub use self::generic::GenericDep;
31pub use self::glob::GlobDep;
32pub use self::glob_items::GlobItemsDep;
33pub use self::line_items::LineItemsDep;
34pub use self::lines::LinesDep;
35pub use self::regex::RegexDep;
36pub use self::regex_items::RegexItemsDep;
37pub use self::sqlite_query::SqliteQueryDep;
38pub use self::step::StepDep;
39pub use self::url::UrlDigestDep;
40
41/// Return default name for the params file from the config
42pub fn conf_params_file(conf: &XvcConfig) -> Result<String> {
43    Ok(conf.get_str("pipeline.default_params_file")?.option)
44}
45
46/// Represents variety of dependencies Xvc supports.
47/// This is to unify all dependencies without dynamic dispatch and having
48/// compile time errors when we miss something about dependencies.
49#[derive(Debug, PartialOrd, Ord, Clone, Eq, PartialEq, Serialize, Deserialize)]
50pub enum XvcDependency {
51    /// Explicitly defined step depenedencies
52    Step(StepDep),
53    /// Dependencies which checks the change of output of a shell command
54    Generic(GenericDep),
55    /// Invalidates when the file content changes.
56    File(FileDep),
57    /// Invalidates when contents in any of the files this glob describes. Keeps track of
58    /// individual files.
59    GlobItems(GlobItemsDep),
60    /// Invalidates when contents in any of the files this glob describes. Doesn't keep track of
61    /// individual files.
62    Glob(GlobDep),
63    /// A dependency to a set of lines defined by a regex. Keeps track of individual lines.
64    RegexItems(RegexItemsDep),
65    /// A dependency to a set of lines defined by a regex. Doesn't keep track of individual lines.
66    Regex(RegexDep),
67    /// A dependency to a parameter in JSON, YAML or TOML file.
68    Param(ParamDep),
69    /// A dependenci to a set of lines defined by a range. Keeps track of individual lines.
70    LineItems(LineItemsDep),
71    /// A dependenci to a set of lines defined by a range. Doesn't keep track of individual lines.
72    Lines(LinesDep),
73
74    /// A dependency to a URL's content
75    UrlDigest(UrlDigestDep),
76
77    /// A dependency to an SQLite Query, that invalidates when the query results change
78    SqliteQueryDigest(SqliteQueryDep),
79    // TODO: Slice {path, begin, length} to specify portions of binary files
80    // TODO: DatabaseTable { database, table } to specify particular tables from databases
81    // TODO: DatabaseQuery { database, query } to specify the result of queries
82    // TODO: GraphQL { url, query } to specify a graphql
83    // TODO: S3 { url } to specify S3 buckets
84    // TODO: REST { url } to make Rest queries
85    // TODO: Bitcoin { wallet } to check Bitcoin wallets
86    // TODO: JupyterNotebook { file, cell }
87    // TODO: EnvironmentVariable { name }
88    // TODO: PythonFunc {file, name}
89    // TODO: PythonClass {file, name}
90}
91
92persist!(XvcDependency, "xvc-dependency");
93
94impl Display for XvcDependency {
95    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
96        match self {
97            XvcDependency::Step(dep) => write!(f, "step({})", dep.name),
98            XvcDependency::Generic(dep) => write!(f, "generic({})", dep.generic_command),
99            XvcDependency::File(dep) => write!(f, "file({})", dep.path),
100            XvcDependency::GlobItems(dep) => write!(f, "glob-items({})", dep.glob),
101            XvcDependency::Glob(dep) => write!(f, "glob({})", dep.glob),
102            XvcDependency::RegexItems(dep) => write!(f, "regex-items({})", dep.path),
103            XvcDependency::Regex(dep) => write!(f, "regex({}:/{})", dep.path, dep.regex),
104            XvcDependency::Param(dep) => write!(f, "param({}::{})", dep.path, dep.key),
105            XvcDependency::LineItems(dep) => {
106                write!(f, "line-items({}::{}-{})", dep.path, dep.begin, dep.end)
107            }
108            XvcDependency::Lines(dep) => {
109                write!(f, "lines({}::{}-{})", dep.path, dep.begin, dep.end)
110            }
111            XvcDependency::UrlDigest(dep) => write!(f, "url-digest({})", dep.url),
112            XvcDependency::SqliteQueryDigest(dep) => {
113                write!(f, "sqlite({}:\"{}\")", dep.path, dep.query)
114            }
115        }
116    }
117}
118
119impl XvcDependency {
120    /// Returns the path of the dependency if it has a single path.
121    pub fn xvc_path(&self) -> Option<XvcPath> {
122        match self {
123            XvcDependency::File(file_dep) => Some(file_dep.path.clone()),
124            XvcDependency::RegexItems(dep) => Some(dep.path.clone()),
125            XvcDependency::Regex(dep) => Some(dep.path.clone()),
126            XvcDependency::Param(dep) => Some(dep.path.clone()),
127            XvcDependency::LineItems(dep) => Some(dep.path.clone()),
128            XvcDependency::Lines(dep) => Some(dep.path.clone()),
129            XvcDependency::SqliteQueryDigest(dep) => Some(dep.path.clone()),
130            XvcDependency::Step(_) => None,
131            XvcDependency::Generic(_) => None,
132            XvcDependency::GlobItems(_) => None,
133            XvcDependency::Glob(_) => None,
134            XvcDependency::UrlDigest(_) => None,
135        }
136    }
137
138    /// Send a list of items if the dependency has a list of items. Otherwise returns None.
139    pub fn items(&self) -> Option<Vec<String>> {
140        match self {
141            XvcDependency::GlobItems(dep) => Some(
142                dep.xvc_path_metadata_map
143                    .keys()
144                    .map(|xp| xp.to_string())
145                    .sorted()
146                    .collect::<Vec<String>>(),
147            ),
148            XvcDependency::RegexItems(dep) => {
149                Some(dep.lines.clone().into_iter().sorted().collect())
150            }
151            XvcDependency::LineItems(dep) => Some(dep.lines.clone().into_iter().sorted().collect()),
152
153            XvcDependency::Step(_)
154            | XvcDependency::Generic(_)
155            | XvcDependency::File(_)
156            | XvcDependency::Glob(_)
157            | XvcDependency::Regex(_)
158            | XvcDependency::Param(_)
159            | XvcDependency::Lines(_)
160            | XvcDependency::SqliteQueryDigest(_)
161            | XvcDependency::UrlDigest(_) => None,
162        }
163    }
164}
165
166/// Returns steps that depend to `to_path`
167/// For dependencies with a single file `path`, these makes equality checks.
168/// For `XvcDependency::Glob ( glob )`, it checks whether `to_path` is in the paths of the dep.
169/// Note that for granular dependencies (`Param`, `Regex`, `Lines`), there may be required further
170/// checks whether the step actually depends to `to_path`, but as we don't have outputs that are
171/// described more granular than a file, it simply assumes if `step-A` writes to `file-A`, any
172/// other step that depends on `file-A` is a dependency to `step-A`.
173pub fn dependencies_to_path(
174    xvc_root: &XvcRoot,
175    pmp: &XvcPathMetadataProvider,
176    pipeline_rundir: &XvcPath,
177    all_deps: &XvcStore<XvcDependency>,
178    to_path: &XvcPath,
179) -> HStore<XvcDependency> {
180    let mut deps_to_path = HStore::<XvcDependency>::with_capacity(all_deps.len());
181    for (dep_e, dep) in all_deps.iter() {
182        let has_path = match dep {
183            XvcDependency::Glob(dep) => {
184                glob_includes(xvc_root, pmp, pipeline_rundir, dep.glob.as_str(), to_path)
185                    .unwrap_or_else(|e| {
186                        e.warn();
187                        false
188                    })
189            }
190            XvcDependency::File(dep) => dep.path == *to_path,
191            XvcDependency::GlobItems(dep) => dep.xvc_path_metadata_map.keys().contains(to_path),
192            XvcDependency::RegexItems(dep) => dep.path == *to_path,
193            XvcDependency::Regex(dep) => dep.path == *to_path,
194            XvcDependency::Param(dep) => dep.path == *to_path,
195            XvcDependency::LineItems(dep) => dep.path == *to_path,
196            XvcDependency::Lines(dep) => dep.path == *to_path,
197            XvcDependency::SqliteQueryDigest(dep) => dep.path == *to_path,
198            XvcDependency::Generic(_) | XvcDependency::Step(_) | XvcDependency::UrlDigest(_) => {
199                false
200            }
201        };
202
203        if has_path {
204            deps_to_path.insert(*dep_e, dep.clone());
205        }
206    }
207    deps_to_path
208}
209
210/// Returns the local paths associated with a dependency. Some dependency types (Pipeline, Step, URL) don't have local paths.
211pub fn dependency_paths(
212    xvc_root: &XvcRoot,
213    pmp: &XvcPathMetadataProvider,
214    pipeline_rundir: &XvcPath,
215    dep: &XvcDependency,
216) -> XvcPathMetadataMap {
217    let make_map = |xp: &XvcPath| {
218        let mut result_map = XvcPathMetadataMap::with_capacity(1);
219        match pmp.get(xp) {
220            Some(md) => {
221                result_map.insert(xp.clone(), md);
222            }
223            None => {
224                Error::PathNotFound {
225                    path: xp.to_absolute_path(xvc_root).as_os_str().to_owned(),
226                }
227                .warn();
228            }
229        }
230        result_map
231    };
232
233    let empty = XvcPathMetadataMap::with_capacity(0);
234    match dep {
235        XvcDependency::Generic(_) => empty,
236        XvcDependency::Step(_) => empty,
237        XvcDependency::File(dep) => make_map(&dep.path),
238        XvcDependency::GlobItems(dep) => dep
239            .xvc_path_metadata_map
240            .iter()
241            .map(|(xp, xmd)| (xp.clone(), *xmd))
242            .collect(),
243        XvcDependency::Glob(dep) => glob_paths(pmp, pipeline_rundir, &dep.glob).unwrap(),
244        XvcDependency::UrlDigest(_) => empty,
245        XvcDependency::Param(dep) => make_map(&dep.path),
246        XvcDependency::RegexItems(dep) => make_map(&dep.path),
247        XvcDependency::LineItems(dep) => make_map(&dep.path),
248        XvcDependency::Regex(dep) => make_map(&dep.path),
249        XvcDependency::Lines(dep) => make_map(&dep.path),
250        XvcDependency::SqliteQueryDigest(dep) => make_map(&dep.path),
251    }
252}