Skip to main content

sbom_walker/source/
http.rs

1use crate::{
2    discover::DiscoveredSbom,
3    model::{self, metadata::SourceMetadata},
4    retrieve::RetrievedSbom,
5    source::Source,
6};
7use bytes::{BufMut, Bytes, BytesMut};
8use digest::Digest;
9use futures::try_join;
10use reqwest::Response;
11use sha2::{Sha256, Sha512};
12use std::time::SystemTime;
13use time::{OffsetDateTime, format_description::well_known::Rfc2822};
14use url::{ParseError, Url};
15use walker_common::{
16    changes::{self, ChangeEntry, ChangeSource},
17    fetcher::{self, DataProcessor, Fetcher},
18    retrieve::{RetrievalMetadata, RetrievedDigest, RetrievingDigest},
19    utils::openpgp::PublicKey,
20    validate::source::{Key, KeySource, KeySourceError},
21};
22
23#[non_exhaustive]
24#[derive(Clone, Debug, Default, PartialEq, Eq)]
25pub struct HttpOptions {
26    pub since: Option<SystemTime>,
27    pub keys: Vec<model::metadata::Key>,
28}
29
30impl HttpOptions {
31    pub fn new() -> Self {
32        Self::default()
33    }
34
35    pub fn since(mut self, since: impl Into<Option<SystemTime>>) -> Self {
36        self.since = since.into();
37        self
38    }
39
40    pub fn keys<I>(mut self, keys: I) -> Self
41    where
42        I: IntoIterator<Item = model::metadata::Key>,
43    {
44        self.keys = Vec::from_iter(keys);
45        self
46    }
47
48    pub fn extend_keys<I>(mut self, keys: I) -> Self
49    where
50        I: IntoIterator<Item = model::metadata::Key>,
51    {
52        self.keys.extend(keys);
53        self
54    }
55
56    pub fn add_key(mut self, key: impl Into<model::metadata::Key>) -> Self {
57        self.keys.push(key.into());
58        self
59    }
60}
61
62#[derive(Clone, Debug)]
63pub struct HttpSource {
64    fetcher: Fetcher,
65    url: Url,
66    options: HttpOptions,
67}
68
69impl HttpSource {
70    pub fn new(url: Url, fetcher: Fetcher, options: HttpOptions) -> Self {
71        Self {
72            url,
73            fetcher,
74            options,
75        }
76    }
77}
78
79#[derive(Debug, thiserror::Error)]
80pub enum HttpSourceError {
81    #[error("Fetch error: {0}")]
82    Fetcher(#[from] fetcher::Error),
83    #[error("URL error: {0}")]
84    Url(#[from] ParseError),
85    #[error("CSV error: {0}")]
86    Csv(#[from] csv::Error),
87}
88
89impl From<changes::Error> for HttpSourceError {
90    fn from(value: changes::Error) -> Self {
91        match value {
92            changes::Error::Fetcher(err) => Self::Fetcher(err),
93            changes::Error::Url(err) => Self::Url(err),
94            changes::Error::Csv(err) => Self::Csv(err),
95        }
96    }
97}
98
99impl walker_common::source::Source for HttpSource {
100    type Error = HttpSourceError;
101    type Retrieved = RetrievedSbom;
102}
103
104impl Source for HttpSource {
105    async fn load_metadata(&self) -> Result<SourceMetadata, Self::Error> {
106        Ok(SourceMetadata {
107            keys: self.options.keys.clone(),
108        })
109    }
110
111    async fn load_index(&self) -> Result<Vec<DiscoveredSbom>, Self::Error> {
112        let base = match self.url.path().ends_with('/') {
113            true => self.url.clone(),
114            false => Url::parse(&format!("{}/", self.url))?,
115        };
116
117        let changes = ChangeSource::retrieve(&self.fetcher, &base).await?;
118
119        Ok(changes
120            .entries
121            .into_iter()
122            .map(|ChangeEntry { file, timestamp }| {
123                let modified = timestamp.into();
124                let url = base.join(&file)?;
125
126                Ok::<_, ParseError>(DiscoveredSbom { url, modified })
127            })
128            // filter out advisories based in since, but only if we can be sure
129            .filter(|advisory| match (advisory, &self.options.since) {
130                (Ok(DiscoveredSbom { url: _, modified }), Some(since)) => modified >= since,
131                _ => true,
132            })
133            .collect::<Result<_, _>>()?)
134    }
135
136    async fn load_sbom(&self, discovered: DiscoveredSbom) -> Result<RetrievedSbom, Self::Error> {
137        let (signature, sha256, sha512) = try_join!(
138            self.fetcher
139                .fetch::<Option<String>>(format!("{url}.asc", url = discovered.url)),
140            self.fetcher
141                .fetch::<Option<String>>(format!("{url}.sha256", url = discovered.url)),
142            self.fetcher
143                .fetch::<Option<String>>(format!("{url}.sha512", url = discovered.url)),
144        )?;
145
146        let sha256 = sha256
147            // take the first "word" from the line
148            .and_then(|expected| expected.split(' ').next().map(ToString::to_string))
149            .map(|expected| RetrievingDigest {
150                expected,
151                current: Sha256::new(),
152            });
153        let sha512 = sha512
154            // take the first "word" from the line
155            .and_then(|expected| expected.split(' ').next().map(ToString::to_string))
156            .map(|expected| RetrievingDigest {
157                expected,
158                current: Sha512::new(),
159            });
160
161        let advisory = self
162            .fetcher
163            .fetch_processed(
164                discovered.url.clone(),
165                FetchingRetrievedSbom { sha256, sha512 },
166            )
167            .await?;
168
169        Ok(advisory.into_retrieved(discovered, signature))
170    }
171}
172
173pub struct FetchedRetrievedSbom {
174    data: Bytes,
175    sha256: Option<RetrievedDigest<Sha256>>,
176    sha512: Option<RetrievedDigest<Sha512>>,
177    metadata: RetrievalMetadata,
178}
179
180impl FetchedRetrievedSbom {
181    fn into_retrieved(
182        self,
183        discovered: DiscoveredSbom,
184        signature: Option<String>,
185    ) -> RetrievedSbom {
186        RetrievedSbom {
187            discovered,
188            data: self.data,
189            signature,
190            sha256: self.sha256,
191            sha512: self.sha512,
192            metadata: self.metadata,
193        }
194    }
195}
196
197pub struct FetchingRetrievedSbom {
198    pub sha256: Option<RetrievingDigest<Sha256>>,
199    pub sha512: Option<RetrievingDigest<Sha512>>,
200}
201
202impl DataProcessor for FetchingRetrievedSbom {
203    type Type = FetchedRetrievedSbom;
204
205    async fn process(&self, response: Response) -> Result<Self::Type, reqwest::Error> {
206        let mut response = response.error_for_status()?;
207
208        let mut data = BytesMut::new();
209        let mut sha256 = self.sha256.clone();
210        let mut sha512 = self.sha512.clone();
211
212        while let Some(chunk) = response.chunk().await? {
213            if let Some(d) = &mut sha256 {
214                d.update(&chunk);
215            }
216            if let Some(d) = &mut sha512 {
217                d.update(&chunk);
218            }
219            data.put(chunk);
220        }
221
222        let etag = response
223            .headers()
224            .get(reqwest::header::ETAG)
225            .and_then(|s| s.to_str().ok())
226            .map(ToString::to_string);
227
228        let last_modification = response
229            .headers()
230            .get(reqwest::header::LAST_MODIFIED)
231            .and_then(|s| s.to_str().ok())
232            .and_then(|s| OffsetDateTime::parse(s, &Rfc2822).ok());
233
234        Ok(FetchedRetrievedSbom {
235            data: data.freeze(),
236            sha256: sha256.map(|d| d.into()),
237            sha512: sha512.map(|d| d.into()),
238            metadata: RetrievalMetadata {
239                last_modification,
240                etag,
241            },
242        })
243    }
244}
245
246impl KeySource for HttpSource {
247    type Error = fetcher::Error;
248
249    async fn load_public_key(
250        &self,
251        key_source: Key<'_>,
252    ) -> Result<PublicKey, KeySourceError<Self::Error>> {
253        self.fetcher.load_public_key(key_source).await
254    }
255}