1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
use crate::model::metadata::ProviderMetadata;
use async_trait::async_trait;
use hickory_resolver::{
    error::ResolveErrorKind, name_server::TokioConnectionProvider, AsyncResolver,
};
use sectxtlib::SecurityTxt;
use url::Url;
use walker_common::fetcher::{self, Fetcher, Json};

#[derive(Debug, thiserror::Error)]
pub enum Error {
    #[error("failed to parse security.txt: {0}")]
    SecurityTxt(#[from] sectxtlib::ParseError),
    #[error("failed to fetch: {0}")]
    Fetch(#[from] fetcher::Error),
    #[error("unable to discover metadata")]
    NotFound,
    #[error("DNS request failed: {0}")]
    Dns(#[from] hickory_resolver::error::ResolveError),
}

#[async_trait(?Send)]
pub trait MetadataSource {
    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error>;
}

#[async_trait(?Send)]
impl MetadataSource for Url {
    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
        Ok(fetcher
            .fetch::<Json<ProviderMetadata>>(self.clone())
            .await?
            .into_inner())
    }
}

/// A metadata source implementing the CSAF metadata discovery process.
#[derive(Clone)]
pub struct MetadataRetriever {
    pub base_url: String,
}

impl MetadataRetriever {
    pub fn new(base_url: impl Into<String>) -> Self {
        Self {
            base_url: base_url.into(),
        }
    }

    /// Fetch a security.txt and extract all CSAF entries.
    ///
    /// In order for a CSAF entry to be considered, it needs to have a scheme of `https` and parse
    /// as a URL.
    pub async fn get_metadata_url_from_security_text(
        fetcher: &Fetcher,
        host_url: String,
    ) -> Result<Option<Url>, Error> {
        // if we fail to retrieve the `security.txt` other than by a 404, we fail
        let Some(text) = fetcher.fetch::<Option<String>>(host_url).await? else {
            return Ok(None);
        };

        // parse as security.txt and extract the CSAF entry
        // as of now, we only take the first valid one

        let text = SecurityTxt::parse(&text)?;
        let url = text
            .extension
            .into_iter()
            .filter(|ext| ext.name == "csaf")
            .filter_map(|ext| Url::parse(&ext.value).ok())
            .find(|url| url.scheme() == "https");

        Ok(url)
    }

    /// Treat the source as a URL and try to retrieve it
    ///
    /// If the source is not a URL, we consider it "not found".
    /// If the URL parses but cannot be found, that's an error.
    pub async fn approach_full_url(
        &self,
        fetcher: &Fetcher,
    ) -> Result<Option<ProviderMetadata>, Error> {
        let Ok(url) = Url::parse(&self.base_url) else {
            return Ok(None);
        };

        Ok(Some(
            fetcher
                .fetch::<Json<ProviderMetadata>>(url)
                .await?
                .into_inner(),
        ))
    }

    /// Retrieve provider metadata through the full well-known URL.
    ///
    /// If retrieving the constructed URL returns a 404, we succeed with `Ok(None)`.
    pub async fn approach_well_known(
        &self,
        fetcher: &Fetcher,
    ) -> Result<Option<ProviderMetadata>, Error> {
        let url = format!(
            "https://{}/.well-known/csaf/provider-metadata.json",
            self.base_url,
        );

        log::debug!("Trying to retrieve by well-known approach: {url}");

        Ok(fetcher
            .fetch::<Option<Json<ProviderMetadata>>>(url)
            .await?
            .map(|metadata| metadata.into_inner()))
    }

    /// Retrieve provider metadata through the DNS path of provided URL.
    ///
    /// As it is hard to detect a "host not found" error, compared to any other connection error,
    /// we do a DNS pre-flight check. If the hostname resolves into an IP address, we assume the
    /// following HTTP request should not fail due to a "host not found" error.
    pub async fn approach_dns(&self, fetcher: &Fetcher) -> Result<Option<ProviderMetadata>, Error> {
        let host = format!("csaf.data.security.{}", self.base_url);

        log::debug!("Trying to retrieve by DNS approach: {host}");

        // DNS pre-flight check

        #[cfg(not(any(unix, target_os = "windows")))]
        let resolver = AsyncResolver::new(
            hickory_resolver::config::ResolverConfig::default(),
            hickory_resolver::config::ResolverOpts::default(),
            TokioConnectionProvider::default(),
        )?;
        #[cfg(any(unix, target_os = "windows"))]
        let resolver = AsyncResolver::from_system_conf(TokioConnectionProvider::default())?;

        match resolver.lookup_ip(&host).await {
            Ok(result) => {
                if result.iter().count() == 0 {
                    return Ok(None);
                }
            }
            Err(err) if matches!(err.kind(), ResolveErrorKind::NoRecordsFound { .. }) => {
                return Ok(None);
            }
            Err(err) => {
                return Err(err.into());
            }
        }

        // fetch content

        let url = format!("https://{host}");

        Ok(fetcher
            .fetch::<Option<Json<ProviderMetadata>>>(url)
            .await?
            .map(|value| value.into_inner()))
    }

    /// Retrieving provider metadata via the security text from the provided URL.
    ///
    /// This takes the source as domain, and the provided path to compose a URL. If the security.txt
    /// cannot be found or doesn't contain a valid CSAF entry, it will return `Ok(None)`.
    pub async fn approach_security_txt(
        &self,
        fetcher: &Fetcher,
        path: &str,
    ) -> Result<Option<ProviderMetadata>, Error> {
        let url = format!("https://{}/{path}", self.base_url);

        log::debug!("Trying to retrieve by security.txt approach: {url}");

        if let Some(url) = Self::get_metadata_url_from_security_text(fetcher, url).await? {
            // if we fail with a 404, that's an error too, as the security.txt pointed to us towards it
            Ok(Some(
                fetcher
                    .fetch::<Json<ProviderMetadata>>(url)
                    .await?
                    .into_inner(),
            ))
        } else {
            Ok(None)
        }
    }
}

#[async_trait(?Send)]
impl MetadataSource for MetadataRetriever {
    async fn load_metadata(&self, fetcher: &Fetcher) -> Result<ProviderMetadata, Error> {
        // try a full URL first

        if let Some(metadata) = self.approach_full_url(fetcher).await? {
            return Ok(metadata);
        }

        // from here on we are following "7.3.1 Finding provider-metadata.json"
        // see: https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#731-finding-provider-metadatajson

        // well-known approach

        if let Some(metadata) = self.approach_well_known(fetcher).await? {
            return Ok(metadata);
        }

        // new security.txt location

        if let Some(metadata) = self
            .approach_security_txt(fetcher, ".well-known/security.txt")
            .await?
        {
            return Ok(metadata);
        }

        // legacy security.txt location

        if let Some(metadata) = self.approach_security_txt(fetcher, "security.txt").await? {
            return Ok(metadata);
        }

        // DNS approach

        if let Some(metadata) = self.approach_dns(fetcher).await? {
            return Ok(metadata);
        }

        // we could not find any metadata

        Err(Error::NotFound)
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use walker_common::fetcher::FetcherOptions;

    #[tokio::test]
    async fn test_dns_fail() {
        let fetcher = Fetcher::new(FetcherOptions::default()).await.unwrap();

        let retriever = MetadataRetriever::new("this-should-not-exist");
        let result = retriever.approach_dns(&fetcher).await.unwrap();

        assert!(result.is_none());
    }

    /// Test a valid DNS case.
    ///
    /// We can't just enable this test, as we don't control this setup, it might break at
    /// any moment.
    #[ignore]
    #[tokio::test]
    async fn test_dns_success() {
        let fetcher = Fetcher::new(FetcherOptions::default()).await.unwrap();

        let retriever = MetadataRetriever::new("nozominetworks.com");
        let result = retriever.approach_dns(&fetcher).await.unwrap();

        assert!(result.is_some());
    }
}