csaf_walker/metadata/
mod.rs

1use crate::model::metadata::ProviderMetadata;
2use async_trait::async_trait;
3use hickory_resolver::Resolver;
4use sectxtlib::SecurityTxt;
5use std::fmt::Debug;
6use url::Url;
7use walker_common::fetcher::{self, Fetcher, Json};
8
9#[derive(Debug, thiserror::Error)]
10pub enum Error {
11    #[error("failed to parse security.txt: {0}")]
12    SecurityTxt(#[from] sectxtlib::ParseError),
13    #[error("failed to fetch: {0}")]
14    Fetch(#[from] fetcher::Error),
15    #[error("unable to discover metadata")]
16    NotFound,
17    #[error("DNS request failed: {0}")]
18    Dns(#[from] hickory_resolver::ResolveError),
19}
20
21#[async_trait(?Send)]
22pub trait MetadataSource: Debug {
23    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error>;
24}
25
26#[async_trait(?Send)]
27impl MetadataSource for Url {
28    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
29        Ok(fetcher
30            .fetch::<Json<ProviderMetadata>>(self.clone())
31            .await?
32            .into_inner())
33    }
34}
35
36#[async_trait(?Send)]
37impl MetadataSource for &str {
38    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
39        MetadataRetriever::new(*self).load_metadata(fetcher).await
40    }
41}
42
43#[async_trait(?Send)]
44impl MetadataSource for String {
45    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
46        MetadataRetriever::new(self).load_metadata(fetcher).await
47    }
48}
49
50/// A metadata source implementing the CSAF metadata discovery process.
51#[derive(Clone, Debug)]
52pub struct MetadataRetriever {
53    pub base_url: String,
54}
55
56impl MetadataRetriever {
57    pub fn new(base_url: impl Into<String>) -> Self {
58        Self {
59            base_url: base_url.into(),
60        }
61    }
62
63    /// Fetch a security.txt and extract all CSAF entries.
64    ///
65    /// In order for a CSAF entry to be considered, it needs to have a scheme of `https` and parse
66    /// as a URL.
67    pub async fn get_metadata_url_from_security_text(
68        fetcher: &Fetcher,
69        host_url: String,
70    ) -> Result<Option<Url>, Error> {
71        // if we fail to retrieve the `security.txt` other than by a 404, we fail
72        let Some(text) = fetcher.fetch::<Option<String>>(host_url).await? else {
73            return Ok(None);
74        };
75
76        // parse as security.txt and extract the CSAF entry
77        // as of now, we only take the first valid one
78
79        let text = SecurityTxt::parse(&text)?;
80        let url = text
81            .extension
82            .into_iter()
83            .filter(|ext| ext.name == "csaf")
84            .filter_map(|ext| Url::parse(&ext.value).ok())
85            .find(|url| url.scheme() == "https");
86
87        if url.is_none() {
88            log::info!("No CSAF information is existing security.txt");
89        }
90
91        Ok(url)
92    }
93
94    /// Treat the source as a URL and try to retrieve it
95    ///
96    /// If the source is not a URL, we consider it "not found".
97    /// If the URL parses but cannot be found, that's an error.
98    pub async fn approach_full_url(
99        &self,
100        fetcher: &Fetcher,
101    ) -> Result<Option<ProviderMetadata>, Error> {
102        let Ok(url) = Url::parse(&self.base_url) else {
103            return Ok(None);
104        };
105
106        Ok(Some(
107            fetcher
108                .fetch::<Json<ProviderMetadata>>(url)
109                .await?
110                .into_inner(),
111        ))
112    }
113
114    /// Retrieve provider metadata through the full well-known URL.
115    ///
116    /// If retrieving the constructed URL returns a 404, we succeed with `Ok(None)`.
117    pub async fn approach_well_known(
118        &self,
119        fetcher: &Fetcher,
120    ) -> Result<Option<ProviderMetadata>, Error> {
121        let url = format!(
122            "https://{}/.well-known/csaf/provider-metadata.json",
123            self.base_url,
124        );
125
126        log::debug!("Trying to retrieve by well-known approach: {url}");
127
128        Ok(fetcher
129            .fetch::<Option<Json<ProviderMetadata>>>(url)
130            .await?
131            .map(|metadata| metadata.into_inner()))
132    }
133
134    /// Retrieve provider metadata through the DNS path of provided URL.
135    ///
136    /// As it is hard to detect a "host not found" error, compared to any other connection error,
137    /// we do a DNS pre-flight check. If the hostname resolves into an IP address, we assume the
138    /// following HTTP request should not fail due to a "host not found" error.
139    pub async fn approach_dns(&self, fetcher: &Fetcher) -> Result<Option<ProviderMetadata>, Error> {
140        let host = format!("csaf.data.security.{}", self.base_url);
141
142        log::debug!("Trying to retrieve by DNS approach: {host}");
143
144        // DNS pre-flight check
145
146        #[cfg(not(any(unix, target_os = "windows")))]
147        let resolver = Resolver::builder_with_config(
148            hickory_resolver::config::ResolverConfig::default(),
149            TokioConnectionProvider::default(),
150        )?;
151        #[cfg(any(unix, target_os = "windows"))]
152        let resolver = Resolver::builder_tokio()?.build();
153
154        match resolver.lookup_ip(&host).await {
155            Ok(result) => {
156                if result.iter().count() == 0 {
157                    return Ok(None);
158                }
159            }
160            Err(err) if err.is_no_records_found() => {
161                return Ok(None);
162            }
163            Err(err) => {
164                return Err(err.into());
165            }
166        }
167
168        // fetch content
169
170        let url = format!("https://{host}");
171
172        Ok(fetcher
173            .fetch::<Option<Json<ProviderMetadata>>>(url)
174            .await?
175            .map(|value| value.into_inner()))
176    }
177
178    /// Retrieving provider metadata via the security text from the provided URL.
179    ///
180    /// This takes the source as domain, and the provided path to compose a URL. If the security.txt
181    /// cannot be found or doesn't contain a valid CSAF entry, it will return `Ok(None)`.
182    pub async fn approach_security_txt(
183        &self,
184        fetcher: &Fetcher,
185        path: &str,
186    ) -> Result<Option<ProviderMetadata>, Error> {
187        let url = format!("https://{}/{path}", self.base_url);
188
189        log::debug!("Trying to retrieve by security.txt approach: {url}");
190
191        if let Some(url) = Self::get_metadata_url_from_security_text(fetcher, url).await? {
192            // if we fail with a 404, that's an error too, as the security.txt pointed to us towards it
193            Ok(Some(
194                fetcher
195                    .fetch::<Json<ProviderMetadata>>(url)
196                    .await?
197                    .into_inner(),
198            ))
199        } else {
200            Ok(None)
201        }
202    }
203}
204
205#[async_trait(?Send)]
206impl MetadataSource for MetadataRetriever {
207    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
208        // try a full URL first
209
210        if let Some(metadata) = self.approach_full_url(fetcher).await? {
211            return Ok(metadata);
212        }
213
214        // from here on we are following "7.3.1 Finding provider-metadata.json"
215        // see: https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#731-finding-provider-metadatajson
216
217        // well-known approach
218
219        if let Some(metadata) = self.approach_well_known(fetcher).await? {
220            return Ok(metadata);
221        }
222
223        // new security.txt location
224
225        if let Some(metadata) = self
226            .approach_security_txt(fetcher, ".well-known/security.txt")
227            .await?
228        {
229            return Ok(metadata);
230        }
231
232        // legacy security.txt location
233
234        if let Some(metadata) = self.approach_security_txt(fetcher, "security.txt").await? {
235            return Ok(metadata);
236        }
237
238        // DNS approach
239
240        if let Some(metadata) = self.approach_dns(fetcher).await? {
241            return Ok(metadata);
242        }
243
244        // we could not find any metadata
245
246        Err(Error::NotFound)
247    }
248}
249
250#[cfg(test)]
251mod test {
252    use super::*;
253    use walker_common::fetcher::FetcherOptions;
254
255    #[tokio::test]
256    async fn test_dns_fail() {
257        let fetcher = Fetcher::new(FetcherOptions::default()).await.unwrap();
258
259        let retriever = MetadataRetriever::new("this-should-not-exist");
260        let result = retriever.approach_dns(&fetcher).await.unwrap();
261
262        assert!(result.is_none());
263    }
264
265    /// Test a valid DNS case.
266    ///
267    /// We can't just enable this test, as we don't control this setup, it might break at
268    /// any moment.
269    #[ignore]
270    #[tokio::test]
271    async fn test_dns_success() {
272        let fetcher = Fetcher::new(FetcherOptions::default()).await.unwrap();
273
274        let retriever = MetadataRetriever::new("nozominetworks.com");
275        let result = retriever.approach_dns(&fetcher).await.unwrap();
276
277        assert!(result.is_some());
278    }
279}