Skip to main content

api_scanner/discovery/
swagger.rs

1use std::collections::HashSet;
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5use serde::Deserialize;
6use tracing::{debug, warn};
7use url::Url;
8
9use crate::{error::CapturedError, http_client::HttpClient};
10
11use super::normalize_path;
12
13static PATH_RE: Lazy<Regex> =
14    Lazy::new(|| Regex::new(r#"["'](/[a-zA-Z0-9_/\-\.\{\}]{2,120})["']"#).unwrap());
15
16// ── Minimal OpenAPI v3 ────────────────────────────────────────────────────────
17
18#[derive(Debug, Deserialize, Default)]
19struct OpenApiV3 {
20    paths: Option<std::collections::HashMap<String, serde_json::Value>>,
21    servers: Option<Vec<ServerObject>>,
22}
23
24#[derive(Debug, Deserialize)]
25struct ServerObject {
26    url: String,
27}
28
29// ── Minimal Swagger v2 ────────────────────────────────────────────────────────
30
31#[derive(Debug, Deserialize, Default)]
32struct SwaggerV2 {
33    paths: Option<std::collections::HashMap<String, serde_json::Value>>,
34    #[serde(rename = "basePath")]
35    base_path: Option<String>,
36    host: Option<String>,
37    schemes: Option<Vec<String>>,
38}
39
40// ── Discovery struct ─────────────────────────────────────────────────────────
41
42pub struct SwaggerDiscovery<'a> {
43    client: &'a HttpClient,
44    base_url: &'a str,
45    host: &'a str,
46}
47
48/// Well-known OpenAPI / Swagger spec locations to probe.
49static SPEC_PATHS: &[&str] = &[
50    "/swagger.json",
51    "/swagger.yaml",
52    "/swagger/v1/swagger.json",
53    "/swagger/v2/swagger.json",
54    "/openapi.json",
55    "/openapi.yaml",
56    "/api-docs",
57    "/api-docs.json",
58    "/api-docs.yaml",
59    "/api/swagger.json",
60    "/api/openapi.json",
61    "/api/v1/swagger.json",
62    "/api/v2/swagger.json",
63    "/v1/swagger.json",
64    "/v2/swagger.json",
65    "/v3/api-docs", // Spring Boot default
66    "/v3/api-docs.yaml",
67];
68
69impl<'a> SwaggerDiscovery<'a> {
70    pub fn new(client: &'a HttpClient, base_url: &'a str, host: &'a str) -> Self {
71        Self {
72            client,
73            base_url,
74            host,
75        }
76    }
77
78    pub async fn run(&self) -> (HashSet<String>, Vec<CapturedError>) {
79        let mut paths = HashSet::new();
80        let mut errors = Vec::new();
81
82        let base = self.base_url.trim_end_matches('/');
83
84        for spec_path in SPEC_PATHS {
85            let url = format!("{base}{spec_path}");
86
87            let resp = match self.client.get(&url).await {
88                Ok(r) if r.status < 400 => r,
89                Ok(_) => continue,
90                Err(e) => {
91                    errors.push(e);
92                    continue;
93                }
94            };
95
96            debug!("[swagger] found spec at {url}");
97            self.client.cache_spec(&url, &resp.body);
98            self.parse_spec(&resp.body, &mut paths, &mut errors);
99        }
100
101        debug!("[swagger] total paths extracted: {}", paths.len());
102        (paths, errors)
103    }
104
105    // ── Parse dispatch ────────────────────────────────────────────────────────
106
107    fn parse_spec(&self, body: &str, paths: &mut HashSet<String>, errors: &mut Vec<CapturedError>) {
108        // Try JSON first (most common), then YAML
109        if body.trim_start().starts_with('{') || body.trim_start().starts_with('[') {
110            self.parse_json(body, paths, errors);
111        } else {
112            self.parse_yaml(body, paths, errors);
113        }
114    }
115
116    // ── JSON parsing ──────────────────────────────────────────────────────────
117
118    fn parse_json(&self, body: &str, paths: &mut HashSet<String>, errors: &mut Vec<CapturedError>) {
119        // Detect spec version from raw JSON before full deserialisation
120        let version_hint = body.contains("\"openapi\"");
121
122        if version_hint {
123            match serde_json::from_str::<OpenApiV3>(body) {
124                Ok(spec) => self.harvest_v3(spec, paths),
125                Err(e) => {
126                    warn!("[swagger] OpenAPI v3 parse failed: {e}");
127                    errors.push(CapturedError::parse("swagger/openapi-v3", e.to_string()));
128                    self.fallback_regex(body, paths);
129                }
130            }
131        } else {
132            match serde_json::from_str::<SwaggerV2>(body) {
133                Ok(spec) => self.harvest_v2(spec, paths),
134                Err(e) => {
135                    warn!("[swagger] Swagger v2 parse failed: {e}");
136                    errors.push(CapturedError::parse("swagger/swagger-v2", e.to_string()));
137                    self.fallback_regex(body, paths);
138                }
139            }
140        }
141    }
142
143    // ── YAML parsing ──────────────────────────────────────────────────────────
144
145    fn parse_yaml(&self, body: &str, paths: &mut HashSet<String>, errors: &mut Vec<CapturedError>) {
146        // Try OpenAPI v3 YAML
147        if body.contains("openapi:") {
148            match serde_yml::from_str::<OpenApiV3>(body) {
149                Ok(spec) => {
150                    self.harvest_v3(spec, paths);
151                    return;
152                }
153                Err(e) => {
154                    warn!("[swagger] YAML OpenAPI v3 parse failed: {e}");
155                    errors.push(CapturedError::parse("swagger/yaml-v3", e.to_string()));
156                }
157            }
158        }
159
160        // Try Swagger v2 YAML
161        match serde_yml::from_str::<SwaggerV2>(body) {
162            Ok(spec) => self.harvest_v2(spec, paths),
163            Err(e) => {
164                warn!("[swagger] YAML Swagger v2 parse failed: {e}");
165                errors.push(CapturedError::parse("swagger/yaml-v2", e.to_string()));
166                self.fallback_regex(body, paths);
167            }
168        }
169    }
170
171    // ── Harvesters ────────────────────────────────────────────────────────────
172
173    /// Extract paths from an OpenAPI v3 spec.
174    /// Respects `servers[].url` to build absolute endpoints when possible.
175    fn harvest_v3(&self, spec: OpenApiV3, paths: &mut HashSet<String>) {
176        // Collect server base URLs that belong to this host
177        let server_bases: Vec<String> = spec
178            .servers
179            .unwrap_or_default()
180            .into_iter()
181            .filter_map(|s| {
182                let url = s.url;
183                // Relative server URL (e.g. "/api/v1") — prefix with base
184                if url.starts_with('/') {
185                    return Some(format!("{}{}", self.base_url.trim_end_matches('/'), url));
186                }
187                // Absolute URL — only keep same-host
188                Url::parse(&url)
189                    .ok()
190                    .filter(|u| u.host_str() == Some(self.host))
191                    .map(|u| u.to_string())
192            })
193            .collect();
194
195        for raw_path in spec.paths.unwrap_or_default().into_keys() {
196            // Strip OpenAPI path-templating: /users/{id} → /users/{id} kept as-is
197            // but we still emit it for endpoint enumeration
198            if server_bases.is_empty() {
199                if let Some(p) = normalize_path(&raw_path, self.host) {
200                    paths.insert(p);
201                }
202            } else {
203                for base in &server_bases {
204                    let full = format!(
205                        "{}/{}",
206                        base.trim_end_matches('/'),
207                        raw_path.trim_start_matches('/')
208                    );
209                    if let Some(p) = normalize_path(&full, self.host) {
210                        paths.insert(p);
211                    }
212                }
213            }
214        }
215    }
216
217    /// Extract paths from a Swagger v2 spec.
218    /// Builds the base from `schemes + host + basePath` when available.
219    fn harvest_v2(&self, spec: SwaggerV2, paths: &mut HashSet<String>) {
220        // Try to construct the v2 server base
221        let server_base: Option<String> = spec.host.as_ref().and_then(|h| {
222            // Only use if same host
223            let canonical = h.split(':').next().unwrap_or(h);
224            if canonical != self.host {
225                return None;
226            }
227            let scheme = spec
228                .schemes
229                .as_deref()
230                .unwrap_or(&[])
231                .iter()
232                .find(|s| s.as_str() == "https" || s.as_str() == "http")
233                .map(|s| s.as_str())
234                .unwrap_or("https");
235
236            let bp = spec
237                .base_path
238                .as_deref()
239                .unwrap_or("")
240                .trim_end_matches('/');
241
242            Some(format!("{scheme}://{h}{bp}"))
243        });
244
245        for raw_path in spec.paths.unwrap_or_default().into_keys() {
246            let candidate = if let Some(ref base) = server_base {
247                format!(
248                    "{}/{}",
249                    base.trim_end_matches('/'),
250                    raw_path.trim_start_matches('/')
251                )
252            } else {
253                // Prepend basePath only if it's a relative path
254                let bp = spec
255                    .base_path
256                    .as_deref()
257                    .unwrap_or("")
258                    .trim_end_matches('/');
259                format!("{bp}{raw_path}")
260            };
261
262            if let Some(p) = normalize_path(&candidate, self.host) {
263                paths.insert(p);
264            }
265        }
266    }
267
268    // ── Regex fallback ────────────────────────────────────────────────────────
269
270    /// When structured parsing fails, scrape any path-like strings from the raw body.
271    fn fallback_regex(&self, body: &str, paths: &mut HashSet<String>) {
272        for cap in PATH_RE.captures_iter(body) {
273            let raw = &cap[1];
274            if let Some(p) = normalize_path(raw, self.host) {
275                paths.insert(p);
276            }
277        }
278    }
279}