csaf_walker/metadata/
mod.rs

1use crate::model::metadata::ProviderMetadata;
2use async_trait::async_trait;
3use hickory_resolver::Resolver;
4use sectxtlib::SecurityTxt;
5use std::fmt::Debug;
6use url::Url;
7use walker_common::fetcher::{self, Fetcher, Json};
8
9#[derive(Debug, thiserror::Error)]
10pub enum Error {
11    #[error("failed to parse security.txt: {0}")]
12    SecurityTxt(#[from] sectxtlib::ParseError),
13    #[error("failed to fetch: {0}")]
14    Fetch(#[from] fetcher::Error),
15    #[error("unable to discover metadata")]
16    NotFound,
17    #[error("DNS request failed: {0}")]
18    Dns(#[from] hickory_resolver::ResolveError),
19}
20
21#[async_trait(?Send)]
22pub trait MetadataSource: Debug {
23    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error>;
24}
25
26#[async_trait(?Send)]
27impl MetadataSource for Url {
28    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
29        Ok(fetcher
30            .fetch::<Json<ProviderMetadata>>(self.clone())
31            .await?
32            .into_inner())
33    }
34}
35
36#[async_trait(?Send)]
37impl MetadataSource for &str {
38    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
39        MetadataRetriever::new(*self).load_metadata(fetcher).await
40    }
41}
42
43#[async_trait(?Send)]
44impl MetadataSource for String {
45    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
46        MetadataRetriever::new(self).load_metadata(fetcher).await
47    }
48}
49
50/// A metadata source implementing the CSAF metadata discovery process.
51#[derive(Clone, Debug)]
52pub struct MetadataRetriever {
53    pub base_url: String,
54}
55
56impl MetadataRetriever {
57    pub fn new(base_url: impl Into<String>) -> Self {
58        Self {
59            base_url: base_url.into(),
60        }
61    }
62
63    /// Fetch a security.txt and extract all CSAF entries.
64    ///
65    /// In order for a CSAF entry to be considered, it needs to have a scheme of `https` and parse
66    /// as a URL.
67    pub async fn get_metadata_url_from_security_text(
68        fetcher: &Fetcher,
69        host_url: String,
70    ) -> Result<Option<Url>, Error> {
71        // if we fail to retrieve the `security.txt` other than by a 404, we fail
72        let Some(text) = fetcher.fetch::<Option<String>>(host_url).await? else {
73            return Ok(None);
74        };
75
76        // parse as security.txt and extract the CSAF entry
77        // as of now, we only take the first valid one
78
79        let text = SecurityTxt::parse(&text)?;
80        let url = text
81            .extension
82            .into_iter()
83            .filter(|ext| ext.name == "csaf")
84            .filter_map(|ext| Url::parse(&ext.value).ok())
85            .find(|url| url.scheme() == "https");
86
87        Ok(url)
88    }
89
90    /// Treat the source as a URL and try to retrieve it
91    ///
92    /// If the source is not a URL, we consider it "not found".
93    /// If the URL parses but cannot be found, that's an error.
94    pub async fn approach_full_url(
95        &self,
96        fetcher: &Fetcher,
97    ) -> Result<Option<ProviderMetadata>, Error> {
98        let Ok(url) = Url::parse(&self.base_url) else {
99            return Ok(None);
100        };
101
102        Ok(Some(
103            fetcher
104                .fetch::<Json<ProviderMetadata>>(url)
105                .await?
106                .into_inner(),
107        ))
108    }
109
110    /// Retrieve provider metadata through the full well-known URL.
111    ///
112    /// If retrieving the constructed URL returns a 404, we succeed with `Ok(None)`.
113    pub async fn approach_well_known(
114        &self,
115        fetcher: &Fetcher,
116    ) -> Result<Option<ProviderMetadata>, Error> {
117        let url = format!(
118            "https://{}/.well-known/csaf/provider-metadata.json",
119            self.base_url,
120        );
121
122        log::debug!("Trying to retrieve by well-known approach: {url}");
123
124        Ok(fetcher
125            .fetch::<Option<Json<ProviderMetadata>>>(url)
126            .await?
127            .map(|metadata| metadata.into_inner()))
128    }
129
130    /// Retrieve provider metadata through the DNS path of provided URL.
131    ///
132    /// As it is hard to detect a "host not found" error, compared to any other connection error,
133    /// we do a DNS pre-flight check. If the hostname resolves into an IP address, we assume the
134    /// following HTTP request should not fail due to a "host not found" error.
135    pub async fn approach_dns(&self, fetcher: &Fetcher) -> Result<Option<ProviderMetadata>, Error> {
136        let host = format!("csaf.data.security.{}", self.base_url);
137
138        log::debug!("Trying to retrieve by DNS approach: {host}");
139
140        // DNS pre-flight check
141
142        #[cfg(not(any(unix, target_os = "windows")))]
143        let resolver = Resolver::builder_with_config(
144            hickory_resolver::config::ResolverConfig::default(),
145            TokioConnectionProvider::default(),
146        )?;
147        #[cfg(any(unix, target_os = "windows"))]
148        let resolver = Resolver::builder_tokio()?.build();
149
150        match resolver.lookup_ip(&host).await {
151            Ok(result) => {
152                if result.iter().count() == 0 {
153                    return Ok(None);
154                }
155            }
156            Err(err) if err.is_no_records_found() => {
157                return Ok(None);
158            }
159            Err(err) => {
160                return Err(err.into());
161            }
162        }
163
164        // fetch content
165
166        let url = format!("https://{host}");
167
168        Ok(fetcher
169            .fetch::<Option<Json<ProviderMetadata>>>(url)
170            .await?
171            .map(|value| value.into_inner()))
172    }
173
174    /// Retrieving provider metadata via the security text from the provided URL.
175    ///
176    /// This takes the source as domain, and the provided path to compose a URL. If the security.txt
177    /// cannot be found or doesn't contain a valid CSAF entry, it will return `Ok(None)`.
178    pub async fn approach_security_txt(
179        &self,
180        fetcher: &Fetcher,
181        path: &str,
182    ) -> Result<Option<ProviderMetadata>, Error> {
183        let url = format!("https://{}/{path}", self.base_url);
184
185        log::debug!("Trying to retrieve by security.txt approach: {url}");
186
187        if let Some(url) = Self::get_metadata_url_from_security_text(fetcher, url).await? {
188            // if we fail with a 404, that's an error too, as the security.txt pointed to us towards it
189            Ok(Some(
190                fetcher
191                    .fetch::<Json<ProviderMetadata>>(url)
192                    .await?
193                    .into_inner(),
194            ))
195        } else {
196            Ok(None)
197        }
198    }
199}
200
201#[async_trait(?Send)]
202impl MetadataSource for MetadataRetriever {
203    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
204        // try a full URL first
205
206        if let Some(metadata) = self.approach_full_url(fetcher).await? {
207            return Ok(metadata);
208        }
209
210        // from here on we are following "7.3.1 Finding provider-metadata.json"
211        // see: https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#731-finding-provider-metadatajson
212
213        // well-known approach
214
215        if let Some(metadata) = self.approach_well_known(fetcher).await? {
216            return Ok(metadata);
217        }
218
219        // new security.txt location
220
221        if let Some(metadata) = self
222            .approach_security_txt(fetcher, ".well-known/security.txt")
223            .await?
224        {
225            return Ok(metadata);
226        }
227
228        // legacy security.txt location
229
230        if let Some(metadata) = self.approach_security_txt(fetcher, "security.txt").await? {
231            return Ok(metadata);
232        }
233
234        // DNS approach
235
236        if let Some(metadata) = self.approach_dns(fetcher).await? {
237            return Ok(metadata);
238        }
239
240        // we could not find any metadata
241
242        Err(Error::NotFound)
243    }
244}
245
246#[cfg(test)]
247mod test {
248    use super::*;
249    use walker_common::fetcher::FetcherOptions;
250
251    #[tokio::test]
252    async fn test_dns_fail() {
253        let fetcher = Fetcher::new(FetcherOptions::default()).await.unwrap();
254
255        let retriever = MetadataRetriever::new("this-should-not-exist");
256        let result = retriever.approach_dns(&fetcher).await.unwrap();
257
258        assert!(result.is_none());
259    }
260
261    /// Test a valid DNS case.
262    ///
263    /// We can't just enable this test, as we don't control this setup, it might break at
264    /// any moment.
265    #[ignore]
266    #[tokio::test]
267    async fn test_dns_success() {
268        let fetcher = Fetcher::new(FetcherOptions::default()).await.unwrap();
269
270        let retriever = MetadataRetriever::new("nozominetworks.com");
271        let result = retriever.approach_dns(&fetcher).await.unwrap();
272
273        assert!(result.is_some());
274    }
275}