1use keyhog_core::{Chunk, ChunkMetadata, Source, SourceError};
33
34const MIN_WASM_STRING_LEN: usize = 8;
36
37const MAX_RESPONSE_BYTES: usize = 10 * 1024 * 1024;
39
40const WASM_MAGIC: &[u8; 4] = b"\x00asm";
42
43pub struct WebSource {
50 urls: Vec<String>,
51}
52
53impl WebSource {
54 pub fn new(urls: Vec<String>) -> Self {
66 Self { urls }
67 }
68
69 pub fn from_url(url: &str) -> Self {
81 Self {
82 urls: vec![url.to_string()],
83 }
84 }
85
86 fn fetch_all(&self) -> Vec<Result<Chunk, SourceError>> {
91 let client = reqwest::blocking::Client::builder()
95 .timeout(crate::timeouts::HTTP_REQUEST)
96 .danger_accept_invalid_certs(false)
97 .redirect(reqwest::redirect::Policy::limited(5))
98 .user_agent("keyhog-web/0.1")
99 .no_gzip()
100 .no_brotli()
101 .no_deflate()
102 .build()
103 .map_err(|e| SourceError::Other(format!("failed to build HTTP client: {e}")));
104
105 let client = match client {
106 Ok(c) => c,
107 Err(e) => return vec![Err(e)],
108 };
109
110 let mut results = Vec::new();
111
112 for url in &self.urls {
113 let chunks = fetch_url(&client, url);
114 results.extend(chunks);
115 }
116
117 results
118 }
119}
120
121impl Source for WebSource {
122 fn name(&self) -> &str {
123 "web"
124 }
125
126 fn chunks(&self) -> Box<dyn Iterator<Item = Result<Chunk, SourceError>> + '_> {
127 Box::new(self.fetch_all().into_iter())
128 }
129 fn as_any(&self) -> &dyn std::any::Any {
130 self
131 }
132}
133
134fn fetch_url(client: &reqwest::blocking::Client, url: &str) -> Vec<Result<Chunk, SourceError>> {
136 let resp = match client.get(url).send() {
137 Ok(r) => r,
138 Err(e) => {
139 return vec![Err(SourceError::Other(format!(
140 "failed to fetch {url}: {e}"
141 )))];
142 }
143 };
144
145 let status = resp.status().as_u16();
146 if status != 200 {
147 tracing::warn!(url, status, "non-200 response, skipping");
148 return Vec::new();
149 }
150
151 let lower = url.to_lowercase();
153 if lower.ends_with(".wasm") {
154 handle_wasm(resp, url)
155 } else if lower.ends_with(".map") || lower.contains(".map?") {
156 handle_sourcemap(resp, url)
157 } else {
158 handle_js(resp, url)
159 }
160}
161
162fn handle_js(resp: reqwest::blocking::Response, url: &str) -> Vec<Result<Chunk, SourceError>> {
164 match read_text_response(resp) {
165 Ok(body) => vec![Ok(Chunk {
166 data: body.into(),
167 metadata: ChunkMetadata {
168 base_offset: 0,
169 source_type: "web:js".to_string(),
170 path: Some(url.to_string()),
171 commit: None,
172 author: None,
173 date: None,
174 mtime_ns: None,
175 size_bytes: None,
176},
177 })],
178 Err(e) => vec![Err(e)],
179 }
180}
181
182fn handle_sourcemap(
185 resp: reqwest::blocking::Response,
186 url: &str,
187) -> Vec<Result<Chunk, SourceError>> {
188 let body = match read_text_response(resp) {
189 Ok(b) => b,
190 Err(e) => return vec![Err(e)],
191 };
192
193 let map: serde_json::Value = match serde_json::from_str(&body) {
194 Ok(v) => v,
195 Err(e) => {
196 tracing::warn!(url, err = %e, "failed to parse source map JSON");
197 return vec![Ok(Chunk {
199 data: body.into(),
200 metadata: ChunkMetadata {
201 base_offset: 0,
202 source_type: "web:sourcemap:raw".to_string(),
203 path: Some(url.to_string()),
204 commit: None,
205 author: None,
206 date: None,
207 mtime_ns: None,
208 size_bytes: None,
209},
210 })];
211 }
212 };
213
214 let sources: Vec<String> = map["sources"]
215 .as_array()
216 .unwrap_or(&vec![])
217 .iter()
218 .filter_map(|v| v.as_str().map(String::from))
219 .collect();
220
221 let contents: Vec<Option<String>> = map["sourcesContent"]
222 .as_array()
223 .map(|arr| arr.iter().map(|v| v.as_str().map(String::from)).collect())
224 .unwrap_or_default();
225
226 let mut chunks = Vec::new();
227
228 for (i, content) in contents.iter().enumerate() {
229 if let Some(code) = content {
230 if code.is_empty() {
231 continue;
232 }
233 let source_name = sources
234 .get(i)
235 .cloned()
236 .unwrap_or_else(|| format!("source_{i}"));
237 chunks.push(Ok(Chunk {
238 data: code.clone().into(),
239 metadata: ChunkMetadata {
240 base_offset: 0,
241 source_type: "web:sourcemap".to_string(),
242 path: Some(format!("{url}!{source_name}")),
243 commit: None,
244 author: None,
245 date: None,
246 mtime_ns: None,
247 size_bytes: None,
248 },
249 }));
250 }
251 }
252
253 if chunks.is_empty() {
255 chunks.push(Ok(Chunk {
256 data: body.into(),
257 metadata: ChunkMetadata {
258 base_offset: 0,
259 source_type: "web:sourcemap:raw".to_string(),
260 path: Some(url.to_string()),
261 commit: None,
262 author: None,
263 date: None,
264 mtime_ns: None,
265 size_bytes: None,
266},
267 }));
268 }
269
270 chunks
271}
272
273fn handle_wasm(resp: reqwest::blocking::Response, url: &str) -> Vec<Result<Chunk, SourceError>> {
275 let bytes = match read_bytes_response(resp) {
276 Ok(b) => b,
277 Err(e) => return vec![Err(e)],
278 };
279
280 if bytes.len() < 4 || &bytes[..4] != WASM_MAGIC {
282 tracing::warn!(url, "not a valid WASM file (wrong magic bytes)");
283 return Vec::new();
284 }
285
286 let strings = crate::strings::extract_printable_strings(&bytes, MIN_WASM_STRING_LEN);
287 if strings.is_empty() {
288 return Vec::new();
289 }
290
291 vec![Ok(Chunk {
292 data: keyhog_core::SensitiveString::join(&strings, "\n"),
293 metadata: ChunkMetadata {
294 base_offset: 0,
295 source_type: "web:wasm".to_string(),
296 path: Some(url.to_string()),
297 commit: None,
298 author: None,
299 date: None,
300 mtime_ns: None,
301 size_bytes: None,
302},
303 })]
304}
305
306fn read_text_response(resp: reqwest::blocking::Response) -> Result<String, SourceError> {
314 let bytes = read_bytes_response(resp)?;
315 String::from_utf8(bytes).map_err(|e| SourceError::Other(format!("non-UTF-8 response: {e}")))
316}
317
318fn read_bytes_response(resp: reqwest::blocking::Response) -> Result<Vec<u8>, SourceError> {
321 use std::io::Read;
322 let url = resp.url().to_string();
323
324 if let Some(len) = resp.content_length() {
325 if len as usize > MAX_RESPONSE_BYTES {
326 return Err(SourceError::Other(format!(
327 "response from {url} declares {len} bytes (> {} MB limit)",
328 MAX_RESPONSE_BYTES / (1024 * 1024)
329 )));
330 }
331 }
332
333 let mut buf = Vec::with_capacity(MAX_RESPONSE_BYTES.min(64 * 1024));
335 let mut taken = resp.take(MAX_RESPONSE_BYTES as u64 + 1);
336 taken
337 .read_to_end(&mut buf)
338 .map_err(|e| SourceError::Other(format!("failed to read bytes from {url}: {e}")))?;
339 if buf.len() > MAX_RESPONSE_BYTES {
340 return Err(SourceError::Other(format!(
341 "response from {url} exceeds {} MB limit",
342 MAX_RESPONSE_BYTES / (1024 * 1024)
343 )));
344 }
345
346 Ok(buf)
347}