csaf_walker/metadata/
mod.rs

1use crate::model::metadata::ProviderMetadata;
2use async_trait::async_trait;
3use hickory_resolver::{
4    error::ResolveErrorKind, name_server::TokioConnectionProvider, AsyncResolver,
5};
6use sectxtlib::SecurityTxt;
7use std::fmt::Debug;
8use url::Url;
9use walker_common::fetcher::{self, Fetcher, Json};
10
11#[derive(Debug, thiserror::Error)]
12pub enum Error {
13    #[error("failed to parse security.txt: {0}")]
14    SecurityTxt(#[from] sectxtlib::ParseError),
15    #[error("failed to fetch: {0}")]
16    Fetch(#[from] fetcher::Error),
17    #[error("unable to discover metadata")]
18    NotFound,
19    #[error("DNS request failed: {0}")]
20    Dns(#[from] hickory_resolver::error::ResolveError),
21}
22
23#[async_trait(?Send)]
24pub trait MetadataSource: Debug {
25    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error>;
26}
27
28#[async_trait(?Send)]
29impl MetadataSource for Url {
30    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
31        Ok(fetcher
32            .fetch::<Json<ProviderMetadata>>(self.clone())
33            .await?
34            .into_inner())
35    }
36}
37
38#[async_trait(?Send)]
39impl MetadataSource for &str {
40    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
41        MetadataRetriever::new(*self).load_metadata(fetcher).await
42    }
43}
44
45#[async_trait(?Send)]
46impl MetadataSource for String {
47    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
48        MetadataRetriever::new(self).load_metadata(fetcher).await
49    }
50}
51
52/// A metadata source implementing the CSAF metadata discovery process.
53#[derive(Clone, Debug)]
54pub struct MetadataRetriever {
55    pub base_url: String,
56}
57
58impl MetadataRetriever {
59    pub fn new(base_url: impl Into<String>) -> Self {
60        Self {
61            base_url: base_url.into(),
62        }
63    }
64
65    /// Fetch a security.txt and extract all CSAF entries.
66    ///
67    /// In order for a CSAF entry to be considered, it needs to have a scheme of `https` and parse
68    /// as a URL.
69    pub async fn get_metadata_url_from_security_text(
70        fetcher: &Fetcher,
71        host_url: String,
72    ) -> Result<Option<Url>, Error> {
73        // if we fail to retrieve the `security.txt` other than by a 404, we fail
74        let Some(text) = fetcher.fetch::<Option<String>>(host_url).await? else {
75            return Ok(None);
76        };
77
78        // parse as security.txt and extract the CSAF entry
79        // as of now, we only take the first valid one
80
81        let text = SecurityTxt::parse(&text)?;
82        let url = text
83            .extension
84            .into_iter()
85            .filter(|ext| ext.name == "csaf")
86            .filter_map(|ext| Url::parse(&ext.value).ok())
87            .find(|url| url.scheme() == "https");
88
89        Ok(url)
90    }
91
92    /// Treat the source as a URL and try to retrieve it
93    ///
94    /// If the source is not a URL, we consider it "not found".
95    /// If the URL parses but cannot be found, that's an error.
96    pub async fn approach_full_url(
97        &self,
98        fetcher: &Fetcher,
99    ) -> Result<Option<ProviderMetadata>, Error> {
100        let Ok(url) = Url::parse(&self.base_url) else {
101            return Ok(None);
102        };
103
104        Ok(Some(
105            fetcher
106                .fetch::<Json<ProviderMetadata>>(url)
107                .await?
108                .into_inner(),
109        ))
110    }
111
112    /// Retrieve provider metadata through the full well-known URL.
113    ///
114    /// If retrieving the constructed URL returns a 404, we succeed with `Ok(None)`.
115    pub async fn approach_well_known(
116        &self,
117        fetcher: &Fetcher,
118    ) -> Result<Option<ProviderMetadata>, Error> {
119        let url = format!(
120            "https://{}/.well-known/csaf/provider-metadata.json",
121            self.base_url,
122        );
123
124        log::debug!("Trying to retrieve by well-known approach: {url}");
125
126        Ok(fetcher
127            .fetch::<Option<Json<ProviderMetadata>>>(url)
128            .await?
129            .map(|metadata| metadata.into_inner()))
130    }
131
132    /// Retrieve provider metadata through the DNS path of provided URL.
133    ///
134    /// As it is hard to detect a "host not found" error, compared to any other connection error,
135    /// we do a DNS pre-flight check. If the hostname resolves into an IP address, we assume the
136    /// following HTTP request should not fail due to a "host not found" error.
137    pub async fn approach_dns(&self, fetcher: &Fetcher) -> Result<Option<ProviderMetadata>, Error> {
138        let host = format!("csaf.data.security.{}", self.base_url);
139
140        log::debug!("Trying to retrieve by DNS approach: {host}");
141
142        // DNS pre-flight check
143
144        #[cfg(not(any(unix, target_os = "windows")))]
145        let resolver = AsyncResolver::new(
146            hickory_resolver::config::ResolverConfig::default(),
147            hickory_resolver::config::ResolverOpts::default(),
148            TokioConnectionProvider::default(),
149        )?;
150        #[cfg(any(unix, target_os = "windows"))]
151        let resolver = AsyncResolver::from_system_conf(TokioConnectionProvider::default())?;
152
153        match resolver.lookup_ip(&host).await {
154            Ok(result) => {
155                if result.iter().count() == 0 {
156                    return Ok(None);
157                }
158            }
159            Err(err) if matches!(err.kind(), ResolveErrorKind::NoRecordsFound { .. }) => {
160                return Ok(None);
161            }
162            Err(err) => {
163                return Err(err.into());
164            }
165        }
166
167        // fetch content
168
169        let url = format!("https://{host}");
170
171        Ok(fetcher
172            .fetch::<Option<Json<ProviderMetadata>>>(url)
173            .await?
174            .map(|value| value.into_inner()))
175    }
176
177    /// Retrieving provider metadata via the security text from the provided URL.
178    ///
179    /// This takes the source as domain, and the provided path to compose a URL. If the security.txt
180    /// cannot be found or doesn't contain a valid CSAF entry, it will return `Ok(None)`.
181    pub async fn approach_security_txt(
182        &self,
183        fetcher: &Fetcher,
184        path: &str,
185    ) -> Result<Option<ProviderMetadata>, Error> {
186        let url = format!("https://{}/{path}", self.base_url);
187
188        log::debug!("Trying to retrieve by security.txt approach: {url}");
189
190        if let Some(url) = Self::get_metadata_url_from_security_text(fetcher, url).await? {
191            // if we fail with a 404, that's an error too, as the security.txt pointed to us towards it
192            Ok(Some(
193                fetcher
194                    .fetch::<Json<ProviderMetadata>>(url)
195                    .await?
196                    .into_inner(),
197            ))
198        } else {
199            Ok(None)
200        }
201    }
202}
203
204#[async_trait(?Send)]
205impl MetadataSource for MetadataRetriever {
206    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
207        // try a full URL first
208
209        if let Some(metadata) = self.approach_full_url(fetcher).await? {
210            return Ok(metadata);
211        }
212
213        // from here on we are following "7.3.1 Finding provider-metadata.json"
214        // see: https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#731-finding-provider-metadatajson
215
216        // well-known approach
217
218        if let Some(metadata) = self.approach_well_known(fetcher).await? {
219            return Ok(metadata);
220        }
221
222        // new security.txt location
223
224        if let Some(metadata) = self
225            .approach_security_txt(fetcher, ".well-known/security.txt")
226            .await?
227        {
228            return Ok(metadata);
229        }
230
231        // legacy security.txt location
232
233        if let Some(metadata) = self.approach_security_txt(fetcher, "security.txt").await? {
234            return Ok(metadata);
235        }
236
237        // DNS approach
238
239        if let Some(metadata) = self.approach_dns(fetcher).await? {
240            return Ok(metadata);
241        }
242
243        // we could not find any metadata
244
245        Err(Error::NotFound)
246    }
247}
248
249#[cfg(test)]
250mod test {
251    use super::*;
252    use walker_common::fetcher::FetcherOptions;
253
254    #[tokio::test]
255    async fn test_dns_fail() {
256        let fetcher = Fetcher::new(FetcherOptions::default()).await.unwrap();
257
258        let retriever = MetadataRetriever::new("this-should-not-exist");
259        let result = retriever.approach_dns(&fetcher).await.unwrap();
260
261        assert!(result.is_none());
262    }
263
264    /// Test a valid DNS case.
265    ///
266    /// We can't just enable this test, as we don't control this setup, it might break at
267    /// any moment.
268    #[ignore]
269    #[tokio::test]
270    async fn test_dns_success() {
271        let fetcher = Fetcher::new(FetcherOptions::default()).await.unwrap();
272
273        let retriever = MetadataRetriever::new("nozominetworks.com");
274        let result = retriever.approach_dns(&fetcher).await.unwrap();
275
276        assert!(result.is_some());
277    }
278}