Skip to main content

api_scanner/discovery/
headers.rs

1use std::collections::HashSet;
2
3use tracing::debug;
4
5use crate::{error::CapturedError, http_client::HttpClient};
6
7use super::normalize_path;
8
9/// Interesting response headers that may reveal internal API paths / links
10const LINK_HEADERS: &[&str] = &["link", "location", "x-redirect-to", "content-location"];
11
12pub struct HeaderDiscovery<'a> {
13    client: &'a HttpClient,
14    base_url: &'a str,
15    host: &'a str,
16}
17
18impl<'a> HeaderDiscovery<'a> {
19    pub fn new(client: &'a HttpClient, base_url: &'a str, host: &'a str) -> Self {
20        Self {
21            client,
22            base_url,
23            host,
24        }
25    }
26
27    /// Probe the root URL (GET + HEAD) and extract navigational paths from headers.
28    pub async fn run(&self) -> (HashSet<String>, Vec<CapturedError>) {
29        let mut paths = HashSet::new();
30        let mut errors = Vec::new();
31
32        for probe in &[
33            self.client.get(self.base_url).await,
34            self.client.head(self.base_url).await,
35        ] {
36            match probe {
37                Ok(resp) => {
38                    for key in LINK_HEADERS {
39                        if let Some(val) = resp.header(key) {
40                            for raw in self.extract_link_targets(val) {
41                                if let Some(p) = normalize_path(&raw, self.host) {
42                                    paths.insert(p);
43                                }
44                            }
45                        }
46                    }
47                }
48                Err(e) => errors.push(e.clone()),
49            }
50        }
51
52        debug!("[headers] found {} paths", paths.len());
53        (paths, errors)
54    }
55
56    /// Parse RFC 5988 Link header values like:
57    /// `</api/v2>; rel="next", </docs>; rel="help"`
58    fn extract_link_targets(&self, header_val: &str) -> Vec<String> {
59        header_val
60            .split(',')
61            .filter_map(|part| {
62                // Extract the <...> URI reference from each link item
63                let start = part.find('<')?;
64                let end = part.find('>')?;
65                if end > start {
66                    Some(part[start + 1..end].trim().to_string())
67                } else {
68                    None
69                }
70            })
71            .collect()
72    }
73}