1use anyhow::{Result, anyhow, bail};
2use chrono::{DateTime, Datelike, Timelike, Utc};
3use std::path::Path;
4
5use crate::models::common::{Version, enums::Filetype};
6use crate::models::provider::{Asset, Release};
7use crate::providers::http::http_client::{ConditionalDiscoveryResult, HttpAssetInfo, HttpClient};
8use crate::providers::release_provider::ReleaseProvider;
9use crate::utils::filename_parser::parse_filetype;
10
11#[derive(Debug, Clone)]
12pub struct WebScraperAdapter {
13 client: HttpClient,
14}
15
16impl WebScraperAdapter {
17 fn parse_version_from_filename(filename: &str) -> Option<Version> {
18 Version::from_filename(filename).ok()
19 }
20
21 fn version_from_last_modified(dt: DateTime<Utc>) -> Version {
22 let major = dt.year_ce().1;
24 let minor = dt.ordinal();
25 let patch = dt.num_seconds_from_midnight();
26 Version::new(major, minor, patch, false)
27 }
28
29 fn is_unversioned_download_asset(info: &HttpAssetInfo) -> bool {
30 if Self::parse_version_from_filename(&info.name).is_some() {
31 return false;
32 }
33
34 matches!(
35 parse_filetype(&info.name),
36 Filetype::AppImage
37 | Filetype::MacApp
38 | Filetype::MacDmg
39 | Filetype::Archive
40 | Filetype::Compressed
41 | Filetype::WinExe
42 )
43 }
44
45 fn select_infos_for_best_version(
46 infos: &[HttpAssetInfo],
47 best_version: Option<&Version>,
48 ) -> Vec<HttpAssetInfo> {
49 let Some(target_version) = best_version else {
50 return infos.to_vec();
51 };
52
53 let filtered: Vec<_> = infos
54 .iter()
55 .filter(|info| {
56 Self::parse_version_from_filename(&info.name)
57 .map(|v| v.cmp(target_version).is_eq())
58 .unwrap_or_else(|| Self::is_unversioned_download_asset(info))
59 })
60 .cloned()
61 .collect();
62
63 if filtered.is_empty() {
64 infos.to_vec()
65 } else {
66 filtered
67 }
68 }
69
70 pub fn new(client: HttpClient) -> Self {
71 Self { client }
72 }
73
74 pub async fn download_asset<F>(
75 &self,
76 asset: &Asset,
77 destination_path: &Path,
78 dl_callback: &mut Option<F>,
79 ) -> Result<()>
80 where
81 F: FnMut(u64, u64),
82 {
83 self.client
84 .download_file(&asset.download_url, destination_path, dl_callback)
85 .await
86 }
87
88 pub async fn get_release_by_tag(&self, _slug: &str, _tag: &str) -> Result<Release> {
89 bail!("HTTP provider does not support tagged releases")
90 }
91
92 pub async fn get_latest_release(&self, slug: &str) -> Result<Release> {
93 self.get_latest_release_if_modified_since(slug, None)
94 .await?
95 .ok_or_else(|| anyhow!("Unexpected not-modified response for scraper provider"))
96 }
97
98 pub async fn get_latest_release_if_modified_since(
99 &self,
100 slug: &str,
101 last_upgraded: Option<DateTime<Utc>>,
102 ) -> Result<Option<Release>> {
103 let discovery = self
104 .client
105 .discover_assets_if_modified_since(slug, last_upgraded)
106 .await?;
107 let mut infos = match discovery {
108 ConditionalDiscoveryResult::NotModified => return Ok(None),
109 ConditionalDiscoveryResult::Assets(infos) => infos,
110 };
111
112 let mut best_version: Option<Version> = None;
113 for info in &infos {
114 if let Some(version) = Self::parse_version_from_filename(&info.name) {
115 match &best_version {
116 Some(prev) if prev.cmp(&version).is_ge() => {}
117 _ => best_version = Some(version),
118 }
119 }
120 }
121
122 if best_version.is_none() {
123 let hydrate_limit = infos.len().min(24);
124 for info in infos.iter_mut().take(hydrate_limit) {
125 let url = info.download_url.clone();
126 if let Ok(probed) = self.client.probe_asset(&url).await {
127 info.size = probed.size;
128 if probed.last_modified.is_some() {
129 info.last_modified = probed.last_modified;
130 }
131 if probed.etag.is_some() {
132 info.etag = probed.etag;
133 }
134 }
135 }
136 }
137
138 if best_version.is_none() {
139 for info in &infos {
140 if let Some(last_modified) = info.last_modified {
141 let version = Self::version_from_last_modified(last_modified);
142 match &best_version {
143 Some(prev) if prev.cmp(&version).is_ge() => {}
144 _ => best_version = Some(version),
145 }
146 }
147 }
148 }
149
150 let selected_infos = Self::select_infos_for_best_version(&infos, best_version.as_ref());
151
152 let published_at = selected_infos
153 .iter()
154 .filter_map(|i| i.last_modified)
155 .max()
156 .unwrap_or_else(|| last_upgraded.unwrap_or_else(Utc::now));
157
158 let assets: Vec<Asset> = selected_infos
159 .iter()
160 .enumerate()
161 .map(|(idx, info)| {
162 Asset::new(
163 info.download_url.clone(),
164 (idx + 1) as u64,
165 info.name.clone(),
166 info.size,
167 info.last_modified.unwrap_or(published_at),
168 )
169 })
170 .collect();
171
172 let version = best_version.unwrap_or_else(|| Version::new(0, 0, 0, false));
173 let release_name = if assets.len() == 1 {
174 let info = &selected_infos[0];
175 if let Some(etag) = &info.etag {
176 format!("{} [{}]", info.name, etag)
177 } else {
178 info.name.clone()
179 }
180 } else {
181 format!("Discovered {} assets", assets.len())
182 };
183 Ok(Some(Release {
184 id: 1,
185 tag: "direct".to_string(),
186 name: release_name,
187 body: "Discovered from HTTP source".to_string(),
188 is_draft: false,
189 is_prerelease: false,
190 assets,
191 version,
192 published_at,
193 }))
194 }
195
196 pub async fn get_releases(
197 &self,
198 slug: &str,
199 _per_page: Option<u32>,
200 _max_total: Option<u32>,
201 ) -> Result<Vec<Release>> {
202 Ok(vec![self.get_latest_release(slug).await?])
203 }
204}
205
206#[async_trait::async_trait(?Send)]
207impl ReleaseProvider for WebScraperAdapter {
208 async fn get_latest_release(&self, slug: &str) -> Result<Release> {
209 WebScraperAdapter::get_latest_release(self, slug).await
210 }
211
212 async fn get_releases(
213 &self,
214 slug: &str,
215 per_page: Option<u32>,
216 max_total: Option<u32>,
217 ) -> Result<Vec<Release>> {
218 WebScraperAdapter::get_releases(self, slug, per_page, max_total).await
219 }
220
221 async fn get_release_by_tag(&self, slug: &str, tag: &str) -> Result<Release> {
222 WebScraperAdapter::get_release_by_tag(self, slug, tag).await
223 }
224
225 async fn get_latest_release_if_modified_since(
226 &self,
227 slug: &str,
228 last_upgraded: Option<DateTime<Utc>>,
229 ) -> Result<Option<Release>> {
230 WebScraperAdapter::get_latest_release_if_modified_since(self, slug, last_upgraded).await
231 }
232
233 async fn download_asset(
234 &self,
235 asset: &Asset,
236 destination_path: &Path,
237 dl_callback: Option<&mut (dyn FnMut(u64, u64) + '_)>,
238 ) -> Result<()> {
239 let mut forwarded = dl_callback;
240 WebScraperAdapter::download_asset(self, asset, destination_path, &mut forwarded).await
241 }
242}
243
244#[cfg(test)]
245mod tests {
246 use super::{HttpAssetInfo, WebScraperAdapter};
247 use crate::models::common::Version;
248 use crate::providers::http::HttpClient;
249 use chrono::Utc;
250 use std::io::{BufRead, BufReader, Write};
251 use std::net::TcpListener;
252 use std::sync::mpsc;
253 use std::thread;
254
255 fn spawn_test_server<F>(max_requests: usize, handler: F) -> String
256 where
257 F: Fn(&str, &str) -> String + Send + 'static,
258 {
259 let (tx, rx) = mpsc::channel();
260 thread::spawn(move || {
261 let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
262 let addr = listener.local_addr().expect("resolve local addr");
263 tx.send(addr).expect("send test server addr");
264
265 for _ in 0..max_requests {
266 let (mut stream, _) = listener.accept().expect("accept request");
267 let cloned = stream.try_clone().expect("clone stream");
268 let mut reader = BufReader::new(cloned);
269
270 let mut request_line = String::new();
271 reader
272 .read_line(&mut request_line)
273 .expect("read request line");
274 let mut parts = request_line.split_whitespace();
275 let method = parts.next().unwrap_or("");
276 let path = parts.next().unwrap_or("/");
277
278 let mut line = String::new();
279 loop {
280 line.clear();
281 reader.read_line(&mut line).expect("read request headers");
282 if line == "\r\n" || line.is_empty() {
283 break;
284 }
285 }
286
287 let response = handler(method, path);
288 stream
289 .write_all(response.as_bytes())
290 .expect("write response");
291 stream.flush().expect("flush response");
292 }
293 });
294
295 let addr = rx.recv().expect("receive server address");
296 format!("http://{}", addr)
297 }
298
299 fn http_response(status_line: &str, headers: &[(&str, &str)], body: &str) -> String {
300 let mut out = format!("{status_line}\r\n");
301 for (k, v) in headers {
302 out.push_str(&format!("{k}: {v}\r\n"));
303 }
304 out.push_str("\r\n");
305 out.push_str(body);
306 out
307 }
308
309 fn fixture_response(body: &'static str) -> String {
310 http_response(
311 "HTTP/1.1 200 OK",
312 &[
313 ("Connection", "close"),
314 ("Content-Type", "text/html"),
315 ("Content-Length", &body.len().to_string()),
316 ],
317 body,
318 )
319 }
320
321 fn asset_names(release: &crate::models::provider::Release) -> Vec<&str> {
322 release
323 .assets
324 .iter()
325 .map(|asset| asset.name.as_str())
326 .collect()
327 }
328
329 #[test]
330 fn parse_version_from_filename_extracts_semver_triplet() {
331 let version = WebScraperAdapter::parse_version_from_filename("tool-v1.4.9-linux.tar.gz")
332 .expect("parsed version");
333 assert_eq!(version.major, 1);
334 assert_eq!(version.minor, 4);
335 assert_eq!(version.patch, 9);
336 }
337
338 fn test_asset(name: &str) -> HttpAssetInfo {
339 HttpAssetInfo {
340 download_url: format!("https://example.invalid/{name}"),
341 name: name.to_string(),
342 size: 0,
343 last_modified: None,
344 etag: None,
345 }
346 }
347
348 #[test]
349 fn version_filter_keeps_unversioned_download_assets() {
350 let infos = vec![
351 test_asset("ffmpeg-release-essentials.7z"),
352 test_asset("ffmpeg-release-essentials.zip"),
353 test_asset("ffmpeg-release-github"),
354 test_asset("ffmpeg-release-essentials.7z.ver"),
355 test_asset("ffmpeg-8.0.1-essentials_build.7z"),
356 test_asset("ffmpeg-8.0.1-full_build.7z"),
357 test_asset("ffmpeg-7.1.1-full_build.7z"),
358 ];
359
360 let selected = WebScraperAdapter::select_infos_for_best_version(
361 &infos,
362 Some(&Version::new(8, 0, 1, false)),
363 );
364 let names: Vec<_> = selected.iter().map(|info| info.name.as_str()).collect();
365
366 assert!(names.contains(&"ffmpeg-release-essentials.7z"));
367 assert!(names.contains(&"ffmpeg-release-essentials.zip"));
368 assert!(names.contains(&"ffmpeg-8.0.1-essentials_build.7z"));
369 assert!(names.contains(&"ffmpeg-8.0.1-full_build.7z"));
370 assert!(!names.contains(&"ffmpeg-release-github"));
371 assert!(!names.contains(&"ffmpeg-release-essentials.7z.ver"));
372 assert!(!names.contains(&"ffmpeg-7.1.1-full_build.7z"));
373 }
374
375 #[tokio::test]
376 async fn get_latest_release_selects_assets_for_latest_detected_version() {
377 let html = r#"
378 <html><body>
379 <a href="/tool-v1.9.0-linux.tar.gz">old</a>
380 <a href="/tool-v1.10.0-linux.tar.gz">new</a>
381 <a href="/tool-v1.10.0-linux.sha256">checksum</a>
382 </body></html>
383 "#
384 .to_string();
385 let html_len = html.len().to_string();
386 let html_for_server = html.clone();
387 let server = spawn_test_server(1, move |method, _| {
388 assert_eq!(method, "GET");
389 http_response(
390 "HTTP/1.1 200 OK",
391 &[
392 ("Connection", "close"),
393 ("Content-Type", "text/html"),
394 ("Content-Length", &html_len),
395 ],
396 &html_for_server,
397 )
398 });
399
400 let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
401 let release = adapter
402 .get_latest_release(&server)
403 .await
404 .expect("latest release");
405
406 assert_eq!(release.version.major, 1);
407 assert_eq!(release.version.minor, 10);
408 assert_eq!(release.version.patch, 0);
409 assert_eq!(release.assets.len(), 1);
410 assert!(release.assets[0].name.contains("1.10.0"));
411 }
412
413 #[tokio::test]
414 async fn fixture_ffmpeg_builds_page_keeps_latest_release_downloads() {
415 let html = include_str!("../../../tests/fixtures/providers/http/ffmpeg.html");
416 let server = spawn_test_server(1, move |method, _| {
417 assert_eq!(method, "GET");
418 fixture_response(html)
419 });
420
421 let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
422 let release = adapter
423 .get_latest_release(&server)
424 .await
425 .expect("latest release");
426 let names = asset_names(&release);
427
428 assert_eq!(release.version, Version::new(8, 0, 1, false));
429 assert!(names.contains(&"ffmpeg-release-essentials.7z"));
430 assert!(names.contains(&"ffmpeg-release-essentials.zip"));
431 assert!(names.contains(&"ffmpeg-release-full.7z"));
432 assert!(names.contains(&"ffmpeg-release-full-shared.7z"));
433 assert!(names.contains(&"ffmpeg-8.0.1-essentials_build.7z"));
434 assert!(names.contains(&"ffmpeg-8.0.1-full_build.7z"));
435 assert!(names.iter().all(|name| !name.ends_with(".sha256")));
436 assert!(names.iter().all(|name| !name.ends_with(".ver")));
437 assert!(!names.contains(&"ffmpeg-release-github"));
438 }
439
440 #[tokio::test]
441 async fn fixture_zig_builds_page_selects_current_build_assets() {
442 let html = include_str!("../../../tests/fixtures/providers/http/zig.html");
443 let server = spawn_test_server(1, move |method, _| {
444 assert_eq!(method, "GET");
445 fixture_response(html)
446 });
447
448 let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
449 let release = adapter
450 .get_latest_release(&server)
451 .await
452 .expect("latest release");
453 let names = asset_names(&release);
454
455 assert_eq!(release.version, Version::new(0, 17, 0, false));
456 assert!(names.contains(&"zig-0.17.0-dev.813+2153f8143.tar.xz"));
457 assert!(names.contains(&"zig-bootstrap-0.17.0-dev.813+2153f8143.tar.xz"));
458 assert!(names.contains(&"zig-x86_64-linux-0.17.0-dev.813+2153f8143.tar.xz"));
459 assert!(names.contains(&"zig-x86_64-windows-0.17.0-dev.813+2153f8143.zip"));
460 assert!(names.iter().all(|name| !name.ends_with(".minisig")));
461 }
462
463 #[tokio::test]
464 async fn get_latest_release_uses_html_last_modified_for_unversioned_links() {
465 let html = r#"
466 <html><body>
467 <a href="/tool-release.zip">download</a>
468 </body></html>
469 "#
470 .to_string();
471 let html_len = html.len().to_string();
472 let html_for_server = html.clone();
473 let server = spawn_test_server(2, move |method, path| match (method, path) {
474 ("GET", "/") => http_response(
475 "HTTP/1.1 200 OK",
476 &[
477 ("Connection", "close"),
478 ("Content-Type", "text/html"),
479 ("Last-Modified", "Tue, 10 Feb 2026 15:04:05 GMT"),
480 ("Content-Length", &html_len),
481 ],
482 &html_for_server,
483 ),
484 ("HEAD", "/tool-release.zip") => http_response(
485 "HTTP/1.1 200 OK",
486 &[("Connection", "close"), ("Content-Length", "0")],
487 "",
488 ),
489 _ => panic!("unexpected request {method} {path}"),
490 });
491
492 let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
493 let release = adapter
494 .get_latest_release(&server)
495 .await
496 .expect("latest release");
497
498 assert_eq!(release.version, Version::new(2026, 41, 54245, false));
499 assert_eq!(release.published_at, release.assets[0].created_at);
500 }
501
502 #[tokio::test]
503 async fn conditional_latest_release_returns_none_on_not_modified() {
504 let server = spawn_test_server(1, move |method, _| {
505 assert_eq!(method, "GET");
506 http_response("HTTP/1.1 304 Not Modified", &[("Connection", "close")], "")
507 });
508 let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
509 let release = adapter
510 .get_latest_release_if_modified_since(&server, Some(Utc::now()))
511 .await
512 .expect("conditional release");
513 assert!(release.is_none());
514 }
515}