xvc_pipeline/pipeline/deps/
regex.rs

1//! A regex dependency is a dependency that is based on a regex search in a text file.
2use std::io::{self, BufRead};
3
4use itertools::Itertools;
5use regex::Regex;
6use serde::{Deserialize, Serialize};
7use xvc_core::types::diff::Diffable;
8use xvc_core::{ContentDigest, Diff, HashAlgorithm, XvcDigest, XvcMetadata, XvcPath, XvcRoot};
9use xvc_core::persist;
10
11use crate::XvcDependency;
12
13/// When a step depends to a regex search in a text file
14#[derive(Debug, PartialOrd, Ord, Clone, Eq, PartialEq, Serialize, Deserialize)]
15pub struct RegexDep {
16    /// Path of the file in the workspace
17    pub path: XvcPath,
18    /// The regex to search in the file
19    // We use this because Regex is not Serializable
20    pub regex: String,
21    /// The digest of the lines that match the regex
22    pub lines_digest: Option<ContentDigest>,
23    /// The metadata of the file
24    pub xvc_metadata: Option<XvcMetadata>,
25}
26
27persist!(RegexDep, "regex-digest-dependency");
28
29impl From<RegexDep> for XvcDependency {
30    fn from(val: RegexDep) -> Self {
31        XvcDependency::Regex(val)
32    }
33}
34
35impl RegexDep {
36    /// Create a new RegexDep with empty metadata and digest
37    pub fn new(path: XvcPath, regex: String) -> Self {
38        Self {
39            path,
40            regex,
41            lines_digest: None,
42            xvc_metadata: None,
43        }
44    }
45    /// Update the metadata of the file
46    pub fn update_metadata(self, xvc_metadata: Option<XvcMetadata>) -> Self {
47        Self {
48            xvc_metadata,
49            ..self
50        }
51    }
52
53    /// Update the digest of the file by reading the file and collecting all lines that match the regex
54    pub fn update_digest(self, xvc_root: &XvcRoot, algorithm: HashAlgorithm) -> Self {
55        let path = self.path.to_absolute_path(xvc_root);
56        let regex = self.regex();
57        let file = std::fs::File::open(path).unwrap();
58        let lines = io::BufReader::new(file).lines();
59        let matching_lines = lines
60            .filter_map(|line| {
61                if let Ok(line) = line {
62                    if regex.is_match(&line) {
63                        Some(line)
64                    } else {
65                        None
66                    }
67                } else {
68                    None
69                }
70            })
71            .join("");
72
73        let lines_digest = Some(XvcDigest::from_content(&matching_lines, algorithm).into());
74        Self {
75            lines_digest,
76            ..self
77        }
78    }
79
80    /// Returns the inner regex
81    pub fn regex(&self) -> Regex {
82        Regex::new(&self.regex).unwrap()
83    }
84}
85
86impl Diffable for RegexDep {
87    type Item = Self;
88
89    /// ⚠️  Update the metadata with actual.update_metadata before calling this function
90    fn diff_superficial(record: &Self::Item, actual: &Self::Item) -> Diff<Self::Item> {
91        assert!(record.path == actual.path);
92
93        match (record.xvc_metadata, actual.xvc_metadata) {
94            (Some(rec_md), Some(act_md)) => {
95                if rec_md == act_md {
96                    Diff::Identical
97                } else {
98                    Diff::Different {
99                        record: record.clone(),
100                        actual: actual.clone(),
101                    }
102                }
103            }
104            (None, Some(_)) => Diff::RecordMissing {
105                actual: actual.clone(),
106            },
107            (Some(_), None) => Diff::ActualMissing {
108                record: record.clone(),
109            },
110            (None, None) => unreachable!("Either record or actual should have metadata"),
111        }
112    }
113
114    /// ⚠️  Update the metadata and lines with actual.update_digest before calling this function
115    fn diff_thorough(record: &Self::Item, actual: &Self::Item) -> Diff<Self::Item> {
116        assert!(record.path == actual.path);
117        if record.lines_digest == actual.lines_digest {
118            Diff::Identical
119        } else {
120            Diff::Different {
121                record: record.clone(),
122                actual: actual.clone(),
123            }
124        }
125    }
126}