xvc_pipeline/pipeline/deps/
url.rs

1//! A step dependency to a URL
2use crate::{Result, XvcDependency};
3
4use reqwest::blocking::Client as HttpClient;
5use serde::{Deserialize, Serialize};
6use url::Url;
7use xvc_core::types::diff::Diffable;
8use xvc_core::{Diff, HashAlgorithm, UrlContentDigest};
9use xvc_core::persist;
10///
11/// Invalidates when header of the URL get request changes.
12#[derive(Debug, PartialOrd, Ord, Clone, Eq, PartialEq, Serialize, Deserialize)]
13pub struct UrlDigestDep {
14    /// URL like https://example.com/my-file.html
15    pub url: Url,
16    /// ETag header from the HEAD request
17    pub etag: Option<String>,
18    /// Last-Modified header from the HEAD request
19    pub last_modified: Option<String>,
20    /// Digest of the content from the URL
21    pub url_content_digest: Option<UrlContentDigest>,
22}
23
24persist!(UrlDigestDep, "url-dependency");
25
26impl From<UrlDigestDep> for XvcDependency {
27    fn from(val: UrlDigestDep) -> Self {
28        XvcDependency::UrlDigest(val)
29    }
30}
31
32impl UrlDigestDep {
33    /// Create a new URL dependency with the given URL and empty headers and content digest.
34    pub fn new(url: Url) -> Self {
35        Self {
36            url,
37            etag: None,
38            last_modified: None,
39            url_content_digest: None,
40        }
41    }
42
43    /// Make a HEAD request and fill Etag and Last-Modified headers.
44    pub fn update_headers(self) -> Result<Self> {
45        let client = HttpClient::new();
46        let response = client.head(self.url.as_str()).send()?.error_for_status()?;
47        let headers = response.headers();
48
49        let etag = headers.get("ETag").map(|s| s.to_str().unwrap().to_string());
50
51        let last_modified = headers
52            .get("Last-Modified")
53            .map(|s| s.to_str().unwrap().to_string());
54        Ok(Self {
55            etag,
56            last_modified,
57            ..self
58        })
59    }
60
61    /// Make a GET request, download the content and fill the content digest.
62    pub fn update_content_digest(self) -> Result<Self> {
63        let url_get_digest = Some(UrlContentDigest::new(&self.url, HashAlgorithm::Blake3)?);
64        Ok(Self {
65            url_content_digest: url_get_digest,
66            ..self
67        })
68    }
69}
70
71impl Diffable for UrlDigestDep {
72    type Item = UrlDigestDep;
73
74    /// ⚠️ Call actual.update_headers before calling this. ⚠️
75    fn diff_superficial(record: &Self::Item, actual: &Self::Item) -> Diff<Self::Item> {
76        assert!(record.url == actual.url);
77
78        if actual.etag.is_none() && actual.last_modified.is_none() {
79            panic!("No ETag or Last-Modified header found in response")
80        } else {
81            match (
82                &actual.etag,
83                &actual.last_modified,
84                &record.etag,
85                &record.last_modified,
86            ) {
87                (None, None, _, _) => unreachable!("We already checked for this"),
88                (None, Some(_), None, None) => Diff::RecordMissing {
89                    actual: actual.clone(),
90                },
91                (None, Some(act), None, Some(rec)) => {
92                    if *act == *rec {
93                        Diff::Identical
94                    } else {
95                        Diff::Different {
96                            record: record.clone(),
97                            actual: actual.clone(),
98                        }
99                    }
100                }
101                // Headers changed
102                (None, Some(_), Some(_), None) => Diff::Different {
103                    record: record.clone(),
104                    actual: actual.clone(),
105                },
106                (None, Some(_), Some(_), Some(_)) => Diff::Different {
107                    record: record.clone(),
108                    actual: actual.clone(),
109                },
110                (Some(_), None, None, None) => Diff::RecordMissing {
111                    actual: actual.clone(),
112                },
113                (Some(_), None, None, Some(_)) => Diff::Different {
114                    record: record.clone(),
115                    actual: actual.clone(),
116                },
117                (Some(act), None, Some(rec), None) => {
118                    if act == rec {
119                        Diff::Identical
120                    } else {
121                        Diff::Different {
122                            record: record.clone(),
123                            actual: actual.clone(),
124                        }
125                    }
126                }
127
128                (Some(_), None, Some(_), Some(_)) => Diff::Different {
129                    record: record.clone(),
130                    actual: actual.clone(),
131                },
132                (Some(_), Some(_), None, None) => Diff::RecordMissing {
133                    actual: actual.clone(),
134                },
135                (Some(_), Some(_), None, Some(_)) => Diff::Different {
136                    record: record.clone(),
137                    actual: actual.clone(),
138                },
139                (Some(_), Some(_), Some(_), None) => Diff::Different {
140                    record: record.clone(),
141                    actual: actual.clone(),
142                },
143                (Some(act_etag), Some(act_lm), Some(rec_etag), Some(rec_lm)) => {
144                    if act_etag == rec_etag && act_lm == rec_lm {
145                        Diff::Identical
146                    } else {
147                        Diff::Different {
148                            record: record.clone(),
149                            actual: actual.clone(),
150                        }
151                    }
152                }
153            }
154        }
155    }
156
157    /// ⚠️ Call actual.update_content_digest before calling this. ⚠️
158    fn diff_thorough(record: &Self::Item, actual: &Self::Item) -> Diff<Self::Item> {
159        match (record.url_content_digest, actual.url_content_digest) {
160            (None, None) => unreachable!("Both record and actual url content digests are None."),
161            (None, Some(_)) => Diff::RecordMissing {
162                actual: actual.clone(),
163            },
164            (Some(_), None) => Diff::ActualMissing {
165                record: record.clone(),
166            },
167            (Some(rec), Some(act)) => {
168                if rec == act {
169                    Diff::Identical
170                } else {
171                    Diff::Different {
172                        record: record.clone(),
173                        actual: actual.clone(),
174                    }
175                }
176            }
177        }
178    }
179
180    /// ⚠️ Call actual.update_content_digest before calling this. ⚠️
181    fn diff(record: Option<&Self::Item>, actual: Option<&Self::Item>) -> Diff<Self::Item> {
182        match (record, actual) {
183            (None, None) => unreachable!("We should never be diffing None with None"),
184            (None, Some(actual)) => Diff::RecordMissing {
185                actual: actual.clone(),
186            },
187            (Some(record), None) => Diff::ActualMissing {
188                record: record.clone(),
189            },
190            (Some(record), Some(actual)) => Self::diff_thorough(record, actual),
191        }
192    }
193}