1use std::collections::HashSet;
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5use serde::Deserialize;
6use tracing::{debug, warn};
7use url::Url;
8
9use crate::{error::CapturedError, http_client::HttpClient};
10
11use super::normalize_path;
12
13static PATH_RE: Lazy<Regex> =
14 Lazy::new(|| Regex::new(r#"["'](/[a-zA-Z0-9_/\-\.\{\}]{2,120})["']"#).unwrap());
15
16#[derive(Debug, Deserialize, Default)]
19struct OpenApiV3 {
20 paths: Option<std::collections::HashMap<String, serde_json::Value>>,
21 servers: Option<Vec<ServerObject>>,
22}
23
24#[derive(Debug, Deserialize)]
25struct ServerObject {
26 url: String,
27}
28
29#[derive(Debug, Deserialize, Default)]
32struct SwaggerV2 {
33 paths: Option<std::collections::HashMap<String, serde_json::Value>>,
34 #[serde(rename = "basePath")]
35 base_path: Option<String>,
36 host: Option<String>,
37 schemes: Option<Vec<String>>,
38}
39
40pub struct SwaggerDiscovery<'a> {
43 client: &'a HttpClient,
44 base_url: &'a str,
45 host: &'a str,
46}
47
48static SPEC_PATHS: &[&str] = &[
50 "/swagger.json",
51 "/swagger.yaml",
52 "/swagger/v1/swagger.json",
53 "/swagger/v2/swagger.json",
54 "/openapi.json",
55 "/openapi.yaml",
56 "/api-docs",
57 "/api-docs.json",
58 "/api-docs.yaml",
59 "/api/swagger.json",
60 "/api/openapi.json",
61 "/api/v1/swagger.json",
62 "/api/v2/swagger.json",
63 "/v1/swagger.json",
64 "/v2/swagger.json",
65 "/v3/api-docs", "/v3/api-docs.yaml",
67];
68
69impl<'a> SwaggerDiscovery<'a> {
70 pub fn new(client: &'a HttpClient, base_url: &'a str, host: &'a str) -> Self {
71 Self {
72 client,
73 base_url,
74 host,
75 }
76 }
77
78 pub async fn run(&self) -> (HashSet<String>, Vec<CapturedError>) {
79 let mut paths = HashSet::new();
80 let mut errors = Vec::new();
81
82 let base = self.base_url.trim_end_matches('/');
83
84 for spec_path in SPEC_PATHS {
85 let url = format!("{base}{spec_path}");
86
87 let resp = match self.client.get(&url).await {
88 Ok(r) if r.status < 400 => r,
89 Ok(_) => continue,
90 Err(e) => {
91 errors.push(e);
92 continue;
93 }
94 };
95
96 debug!("[swagger] found spec at {url}");
97 self.client.cache_spec(&url, &resp.body);
98 self.parse_spec(&resp.body, &mut paths, &mut errors);
99 }
100
101 debug!("[swagger] total paths extracted: {}", paths.len());
102 (paths, errors)
103 }
104
105 fn parse_spec(&self, body: &str, paths: &mut HashSet<String>, errors: &mut Vec<CapturedError>) {
108 if body.trim_start().starts_with('{') || body.trim_start().starts_with('[') {
110 self.parse_json(body, paths, errors);
111 } else {
112 self.parse_yaml(body, paths, errors);
113 }
114 }
115
116 fn parse_json(&self, body: &str, paths: &mut HashSet<String>, errors: &mut Vec<CapturedError>) {
119 let version_hint = body.contains("\"openapi\"");
121
122 if version_hint {
123 match serde_json::from_str::<OpenApiV3>(body) {
124 Ok(spec) => self.harvest_v3(spec, paths),
125 Err(e) => {
126 warn!("[swagger] OpenAPI v3 parse failed: {e}");
127 errors.push(CapturedError::parse("swagger/openapi-v3", e.to_string()));
128 self.fallback_regex(body, paths);
129 }
130 }
131 } else {
132 match serde_json::from_str::<SwaggerV2>(body) {
133 Ok(spec) => self.harvest_v2(spec, paths),
134 Err(e) => {
135 warn!("[swagger] Swagger v2 parse failed: {e}");
136 errors.push(CapturedError::parse("swagger/swagger-v2", e.to_string()));
137 self.fallback_regex(body, paths);
138 }
139 }
140 }
141 }
142
143 fn parse_yaml(&self, body: &str, paths: &mut HashSet<String>, errors: &mut Vec<CapturedError>) {
146 if body.contains("openapi:") {
148 match serde_yml::from_str::<OpenApiV3>(body) {
149 Ok(spec) => {
150 self.harvest_v3(spec, paths);
151 return;
152 }
153 Err(e) => {
154 warn!("[swagger] YAML OpenAPI v3 parse failed: {e}");
155 errors.push(CapturedError::parse("swagger/yaml-v3", e.to_string()));
156 }
157 }
158 }
159
160 match serde_yml::from_str::<SwaggerV2>(body) {
162 Ok(spec) => self.harvest_v2(spec, paths),
163 Err(e) => {
164 warn!("[swagger] YAML Swagger v2 parse failed: {e}");
165 errors.push(CapturedError::parse("swagger/yaml-v2", e.to_string()));
166 self.fallback_regex(body, paths);
167 }
168 }
169 }
170
171 fn harvest_v3(&self, spec: OpenApiV3, paths: &mut HashSet<String>) {
176 let server_bases: Vec<String> = spec
178 .servers
179 .unwrap_or_default()
180 .into_iter()
181 .filter_map(|s| {
182 let url = s.url;
183 if url.starts_with('/') {
185 return Some(format!("{}{}", self.base_url.trim_end_matches('/'), url));
186 }
187 Url::parse(&url)
189 .ok()
190 .filter(|u| u.host_str() == Some(self.host))
191 .map(|u| u.to_string())
192 })
193 .collect();
194
195 for raw_path in spec.paths.unwrap_or_default().into_keys() {
196 if server_bases.is_empty() {
199 if let Some(p) = normalize_path(&raw_path, self.host) {
200 paths.insert(p);
201 }
202 } else {
203 for base in &server_bases {
204 let full = format!(
205 "{}/{}",
206 base.trim_end_matches('/'),
207 raw_path.trim_start_matches('/')
208 );
209 if let Some(p) = normalize_path(&full, self.host) {
210 paths.insert(p);
211 }
212 }
213 }
214 }
215 }
216
217 fn harvest_v2(&self, spec: SwaggerV2, paths: &mut HashSet<String>) {
220 let server_base: Option<String> = spec.host.as_ref().and_then(|h| {
222 let canonical = h.split(':').next().unwrap_or(h);
224 if canonical != self.host {
225 return None;
226 }
227 let scheme = spec
228 .schemes
229 .as_deref()
230 .unwrap_or(&[])
231 .iter()
232 .find(|s| s.as_str() == "https" || s.as_str() == "http")
233 .map(|s| s.as_str())
234 .unwrap_or("https");
235
236 let bp = spec
237 .base_path
238 .as_deref()
239 .unwrap_or("")
240 .trim_end_matches('/');
241
242 Some(format!("{scheme}://{h}{bp}"))
243 });
244
245 for raw_path in spec.paths.unwrap_or_default().into_keys() {
246 let candidate = if let Some(ref base) = server_base {
247 format!(
248 "{}/{}",
249 base.trim_end_matches('/'),
250 raw_path.trim_start_matches('/')
251 )
252 } else {
253 let bp = spec
255 .base_path
256 .as_deref()
257 .unwrap_or("")
258 .trim_end_matches('/');
259 format!("{bp}{raw_path}")
260 };
261
262 if let Some(p) = normalize_path(&candidate, self.host) {
263 paths.insert(p);
264 }
265 }
266 }
267
268 fn fallback_regex(&self, body: &str, paths: &mut HashSet<String>) {
272 for cap in PATH_RE.captures_iter(body) {
273 let raw = &cap[1];
274 if let Some(p) = normalize_path(raw, self.host) {
275 paths.insert(p);
276 }
277 }
278 }
279}