Skip to main content

upstream_rs/providers/http/
webscraper_adapter.rs

1use anyhow::{Result, anyhow, bail};
2use chrono::{DateTime, Datelike, Timelike, Utc};
3use std::path::Path;
4
5use crate::models::common::{Version, enums::Filetype};
6use crate::models::provider::{Asset, Release};
7use crate::providers::http::http_client::{ConditionalDiscoveryResult, HttpAssetInfo, HttpClient};
8use crate::providers::release_provider::ReleaseProvider;
9use crate::utils::filename_parser::parse_filetype;
10
11#[derive(Debug, Clone)]
12pub struct WebScraperAdapter {
13    client: HttpClient,
14}
15
16impl WebScraperAdapter {
17    fn parse_version_from_filename(filename: &str) -> Option<Version> {
18        Version::from_filename(filename).ok()
19    }
20
21    fn version_from_last_modified(dt: DateTime<Utc>) -> Version {
22        // Monotonic semver-like mapping for stable update comparisons.
23        let major = dt.year_ce().1;
24        let minor = dt.ordinal();
25        let patch = dt.num_seconds_from_midnight();
26        Version::new(major, minor, patch, false)
27    }
28
29    fn is_unversioned_download_asset(info: &HttpAssetInfo) -> bool {
30        if Self::parse_version_from_filename(&info.name).is_some() {
31            return false;
32        }
33
34        matches!(
35            parse_filetype(&info.name),
36            Filetype::AppImage
37                | Filetype::MacApp
38                | Filetype::MacDmg
39                | Filetype::Archive
40                | Filetype::Compressed
41                | Filetype::WinExe
42        )
43    }
44
45    fn select_infos_for_best_version(
46        infos: &[HttpAssetInfo],
47        best_version: Option<&Version>,
48    ) -> Vec<HttpAssetInfo> {
49        let Some(target_version) = best_version else {
50            return infos.to_vec();
51        };
52
53        let filtered: Vec<_> = infos
54            .iter()
55            .filter(|info| {
56                Self::parse_version_from_filename(&info.name)
57                    .map(|v| v.cmp(target_version).is_eq())
58                    .unwrap_or_else(|| Self::is_unversioned_download_asset(info))
59            })
60            .cloned()
61            .collect();
62
63        if filtered.is_empty() {
64            infos.to_vec()
65        } else {
66            filtered
67        }
68    }
69
70    pub fn new(client: HttpClient) -> Self {
71        Self { client }
72    }
73
74    pub async fn download_asset<F>(
75        &self,
76        asset: &Asset,
77        destination_path: &Path,
78        dl_callback: &mut Option<F>,
79    ) -> Result<()>
80    where
81        F: FnMut(u64, u64),
82    {
83        self.client
84            .download_file(&asset.download_url, destination_path, dl_callback)
85            .await
86    }
87
88    pub async fn get_release_by_tag(&self, _slug: &str, _tag: &str) -> Result<Release> {
89        bail!("HTTP provider does not support tagged releases")
90    }
91
92    pub async fn get_latest_release(&self, slug: &str) -> Result<Release> {
93        self.get_latest_release_if_modified_since(slug, None)
94            .await?
95            .ok_or_else(|| anyhow!("Unexpected not-modified response for scraper provider"))
96    }
97
98    pub async fn get_latest_release_if_modified_since(
99        &self,
100        slug: &str,
101        last_upgraded: Option<DateTime<Utc>>,
102    ) -> Result<Option<Release>> {
103        let discovery = self
104            .client
105            .discover_assets_if_modified_since(slug, last_upgraded)
106            .await?;
107        let mut infos = match discovery {
108            ConditionalDiscoveryResult::NotModified => return Ok(None),
109            ConditionalDiscoveryResult::Assets(infos) => infos,
110        };
111
112        let mut best_version: Option<Version> = None;
113        for info in &infos {
114            if let Some(version) = Self::parse_version_from_filename(&info.name) {
115                match &best_version {
116                    Some(prev) if prev.cmp(&version).is_ge() => {}
117                    _ => best_version = Some(version),
118                }
119            }
120        }
121
122        if best_version.is_none() {
123            let hydrate_limit = infos.len().min(24);
124            for info in infos.iter_mut().take(hydrate_limit) {
125                let url = info.download_url.clone();
126                if let Ok(probed) = self.client.probe_asset(&url).await {
127                    info.size = probed.size;
128                    if probed.last_modified.is_some() {
129                        info.last_modified = probed.last_modified;
130                    }
131                    if probed.etag.is_some() {
132                        info.etag = probed.etag;
133                    }
134                }
135            }
136        }
137
138        if best_version.is_none() {
139            for info in &infos {
140                if let Some(last_modified) = info.last_modified {
141                    let version = Self::version_from_last_modified(last_modified);
142                    match &best_version {
143                        Some(prev) if prev.cmp(&version).is_ge() => {}
144                        _ => best_version = Some(version),
145                    }
146                }
147            }
148        }
149
150        let selected_infos = Self::select_infos_for_best_version(&infos, best_version.as_ref());
151
152        let published_at = selected_infos
153            .iter()
154            .filter_map(|i| i.last_modified)
155            .max()
156            .unwrap_or_else(|| last_upgraded.unwrap_or_else(Utc::now));
157
158        let assets: Vec<Asset> = selected_infos
159            .iter()
160            .enumerate()
161            .map(|(idx, info)| {
162                Asset::new(
163                    info.download_url.clone(),
164                    (idx + 1) as u64,
165                    info.name.clone(),
166                    info.size,
167                    info.last_modified.unwrap_or(published_at),
168                )
169            })
170            .collect();
171
172        let version = best_version.unwrap_or_else(|| Version::new(0, 0, 0, false));
173        let release_name = if assets.len() == 1 {
174            let info = &selected_infos[0];
175            if let Some(etag) = &info.etag {
176                format!("{} [{}]", info.name, etag)
177            } else {
178                info.name.clone()
179            }
180        } else {
181            format!("Discovered {} assets", assets.len())
182        };
183        Ok(Some(Release {
184            id: 1,
185            tag: "direct".to_string(),
186            name: release_name,
187            body: "Discovered from HTTP source".to_string(),
188            is_draft: false,
189            is_prerelease: false,
190            assets,
191            version,
192            published_at,
193        }))
194    }
195
196    pub async fn get_releases(
197        &self,
198        slug: &str,
199        _per_page: Option<u32>,
200        _max_total: Option<u32>,
201    ) -> Result<Vec<Release>> {
202        Ok(vec![self.get_latest_release(slug).await?])
203    }
204}
205
206#[async_trait::async_trait(?Send)]
207impl ReleaseProvider for WebScraperAdapter {
208    async fn get_latest_release(&self, slug: &str) -> Result<Release> {
209        WebScraperAdapter::get_latest_release(self, slug).await
210    }
211
212    async fn get_releases(
213        &self,
214        slug: &str,
215        per_page: Option<u32>,
216        max_total: Option<u32>,
217    ) -> Result<Vec<Release>> {
218        WebScraperAdapter::get_releases(self, slug, per_page, max_total).await
219    }
220
221    async fn get_release_by_tag(&self, slug: &str, tag: &str) -> Result<Release> {
222        WebScraperAdapter::get_release_by_tag(self, slug, tag).await
223    }
224
225    async fn get_latest_release_if_modified_since(
226        &self,
227        slug: &str,
228        last_upgraded: Option<DateTime<Utc>>,
229    ) -> Result<Option<Release>> {
230        WebScraperAdapter::get_latest_release_if_modified_since(self, slug, last_upgraded).await
231    }
232
233    async fn download_asset(
234        &self,
235        asset: &Asset,
236        destination_path: &Path,
237        dl_callback: Option<&mut (dyn FnMut(u64, u64) + '_)>,
238    ) -> Result<()> {
239        let mut forwarded = dl_callback;
240        WebScraperAdapter::download_asset(self, asset, destination_path, &mut forwarded).await
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use super::{HttpAssetInfo, WebScraperAdapter};
247    use crate::models::common::Version;
248    use crate::providers::http::HttpClient;
249    use chrono::Utc;
250    use std::io::{BufRead, BufReader, Write};
251    use std::net::TcpListener;
252    use std::sync::mpsc;
253    use std::thread;
254
255    fn spawn_test_server<F>(max_requests: usize, handler: F) -> String
256    where
257        F: Fn(&str, &str) -> String + Send + 'static,
258    {
259        let (tx, rx) = mpsc::channel();
260        thread::spawn(move || {
261            let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
262            let addr = listener.local_addr().expect("resolve local addr");
263            tx.send(addr).expect("send test server addr");
264
265            for _ in 0..max_requests {
266                let (mut stream, _) = listener.accept().expect("accept request");
267                let cloned = stream.try_clone().expect("clone stream");
268                let mut reader = BufReader::new(cloned);
269
270                let mut request_line = String::new();
271                reader
272                    .read_line(&mut request_line)
273                    .expect("read request line");
274                let mut parts = request_line.split_whitespace();
275                let method = parts.next().unwrap_or("");
276                let path = parts.next().unwrap_or("/");
277
278                let mut line = String::new();
279                loop {
280                    line.clear();
281                    reader.read_line(&mut line).expect("read request headers");
282                    if line == "\r\n" || line.is_empty() {
283                        break;
284                    }
285                }
286
287                let response = handler(method, path);
288                stream
289                    .write_all(response.as_bytes())
290                    .expect("write response");
291                stream.flush().expect("flush response");
292            }
293        });
294
295        let addr = rx.recv().expect("receive server address");
296        format!("http://{}", addr)
297    }
298
299    fn http_response(status_line: &str, headers: &[(&str, &str)], body: &str) -> String {
300        let mut out = format!("{status_line}\r\n");
301        for (k, v) in headers {
302            out.push_str(&format!("{k}: {v}\r\n"));
303        }
304        out.push_str("\r\n");
305        out.push_str(body);
306        out
307    }
308
309    fn fixture_response(body: &'static str) -> String {
310        http_response(
311            "HTTP/1.1 200 OK",
312            &[
313                ("Connection", "close"),
314                ("Content-Type", "text/html"),
315                ("Content-Length", &body.len().to_string()),
316            ],
317            body,
318        )
319    }
320
321    fn asset_names(release: &crate::models::provider::Release) -> Vec<&str> {
322        release
323            .assets
324            .iter()
325            .map(|asset| asset.name.as_str())
326            .collect()
327    }
328
329    #[test]
330    fn parse_version_from_filename_extracts_semver_triplet() {
331        let version = WebScraperAdapter::parse_version_from_filename("tool-v1.4.9-linux.tar.gz")
332            .expect("parsed version");
333        assert_eq!(version.major, 1);
334        assert_eq!(version.minor, 4);
335        assert_eq!(version.patch, 9);
336    }
337
338    fn test_asset(name: &str) -> HttpAssetInfo {
339        HttpAssetInfo {
340            download_url: format!("https://example.invalid/{name}"),
341            name: name.to_string(),
342            size: 0,
343            last_modified: None,
344            etag: None,
345        }
346    }
347
348    #[test]
349    fn version_filter_keeps_unversioned_download_assets() {
350        let infos = vec![
351            test_asset("ffmpeg-release-essentials.7z"),
352            test_asset("ffmpeg-release-essentials.zip"),
353            test_asset("ffmpeg-release-github"),
354            test_asset("ffmpeg-release-essentials.7z.ver"),
355            test_asset("ffmpeg-8.0.1-essentials_build.7z"),
356            test_asset("ffmpeg-8.0.1-full_build.7z"),
357            test_asset("ffmpeg-7.1.1-full_build.7z"),
358        ];
359
360        let selected = WebScraperAdapter::select_infos_for_best_version(
361            &infos,
362            Some(&Version::new(8, 0, 1, false)),
363        );
364        let names: Vec<_> = selected.iter().map(|info| info.name.as_str()).collect();
365
366        assert!(names.contains(&"ffmpeg-release-essentials.7z"));
367        assert!(names.contains(&"ffmpeg-release-essentials.zip"));
368        assert!(names.contains(&"ffmpeg-8.0.1-essentials_build.7z"));
369        assert!(names.contains(&"ffmpeg-8.0.1-full_build.7z"));
370        assert!(!names.contains(&"ffmpeg-release-github"));
371        assert!(!names.contains(&"ffmpeg-release-essentials.7z.ver"));
372        assert!(!names.contains(&"ffmpeg-7.1.1-full_build.7z"));
373    }
374
375    #[tokio::test]
376    async fn get_latest_release_selects_assets_for_latest_detected_version() {
377        let html = r#"
378                <html><body>
379                    <a href="/tool-v1.9.0-linux.tar.gz">old</a>
380                    <a href="/tool-v1.10.0-linux.tar.gz">new</a>
381                    <a href="/tool-v1.10.0-linux.sha256">checksum</a>
382                </body></html>
383            "#
384        .to_string();
385        let html_len = html.len().to_string();
386        let html_for_server = html.clone();
387        let server = spawn_test_server(1, move |method, _| {
388            assert_eq!(method, "GET");
389            http_response(
390                "HTTP/1.1 200 OK",
391                &[
392                    ("Connection", "close"),
393                    ("Content-Type", "text/html"),
394                    ("Content-Length", &html_len),
395                ],
396                &html_for_server,
397            )
398        });
399
400        let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
401        let release = adapter
402            .get_latest_release(&server)
403            .await
404            .expect("latest release");
405
406        assert_eq!(release.version.major, 1);
407        assert_eq!(release.version.minor, 10);
408        assert_eq!(release.version.patch, 0);
409        assert_eq!(release.assets.len(), 1);
410        assert!(release.assets[0].name.contains("1.10.0"));
411    }
412
413    #[tokio::test]
414    async fn fixture_ffmpeg_builds_page_keeps_latest_release_downloads() {
415        let html = include_str!("../../../tests/fixtures/providers/http/ffmpeg.html");
416        let server = spawn_test_server(1, move |method, _| {
417            assert_eq!(method, "GET");
418            fixture_response(html)
419        });
420
421        let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
422        let release = adapter
423            .get_latest_release(&server)
424            .await
425            .expect("latest release");
426        let names = asset_names(&release);
427
428        assert_eq!(release.version, Version::new(8, 0, 1, false));
429        assert!(names.contains(&"ffmpeg-release-essentials.7z"));
430        assert!(names.contains(&"ffmpeg-release-essentials.zip"));
431        assert!(names.contains(&"ffmpeg-release-full.7z"));
432        assert!(names.contains(&"ffmpeg-release-full-shared.7z"));
433        assert!(names.contains(&"ffmpeg-8.0.1-essentials_build.7z"));
434        assert!(names.contains(&"ffmpeg-8.0.1-full_build.7z"));
435        assert!(names.iter().all(|name| !name.ends_with(".sha256")));
436        assert!(names.iter().all(|name| !name.ends_with(".ver")));
437        assert!(!names.contains(&"ffmpeg-release-github"));
438    }
439
440    #[tokio::test]
441    async fn fixture_zig_builds_page_selects_current_build_assets() {
442        let html = include_str!("../../../tests/fixtures/providers/http/zig.html");
443        let server = spawn_test_server(1, move |method, _| {
444            assert_eq!(method, "GET");
445            fixture_response(html)
446        });
447
448        let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
449        let release = adapter
450            .get_latest_release(&server)
451            .await
452            .expect("latest release");
453        let names = asset_names(&release);
454
455        assert_eq!(release.version, Version::new(0, 17, 0, false));
456        assert!(names.contains(&"zig-0.17.0-dev.813+2153f8143.tar.xz"));
457        assert!(names.contains(&"zig-bootstrap-0.17.0-dev.813+2153f8143.tar.xz"));
458        assert!(names.contains(&"zig-x86_64-linux-0.17.0-dev.813+2153f8143.tar.xz"));
459        assert!(names.contains(&"zig-x86_64-windows-0.17.0-dev.813+2153f8143.zip"));
460        assert!(names.iter().all(|name| !name.ends_with(".minisig")));
461    }
462
463    #[tokio::test]
464    async fn get_latest_release_uses_html_last_modified_for_unversioned_links() {
465        let html = r#"
466                <html><body>
467                    <a href="/tool-release.zip">download</a>
468                </body></html>
469            "#
470        .to_string();
471        let html_len = html.len().to_string();
472        let html_for_server = html.clone();
473        let server = spawn_test_server(2, move |method, path| match (method, path) {
474            ("GET", "/") => http_response(
475                "HTTP/1.1 200 OK",
476                &[
477                    ("Connection", "close"),
478                    ("Content-Type", "text/html"),
479                    ("Last-Modified", "Tue, 10 Feb 2026 15:04:05 GMT"),
480                    ("Content-Length", &html_len),
481                ],
482                &html_for_server,
483            ),
484            ("HEAD", "/tool-release.zip") => http_response(
485                "HTTP/1.1 200 OK",
486                &[("Connection", "close"), ("Content-Length", "0")],
487                "",
488            ),
489            _ => panic!("unexpected request {method} {path}"),
490        });
491
492        let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
493        let release = adapter
494            .get_latest_release(&server)
495            .await
496            .expect("latest release");
497
498        assert_eq!(release.version, Version::new(2026, 41, 54245, false));
499        assert_eq!(release.published_at, release.assets[0].created_at);
500    }
501
502    #[tokio::test]
503    async fn conditional_latest_release_returns_none_on_not_modified() {
504        let server = spawn_test_server(1, move |method, _| {
505            assert_eq!(method, "GET");
506            http_response("HTTP/1.1 304 Not Modified", &[("Connection", "close")], "")
507        });
508        let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
509        let release = adapter
510            .get_latest_release_if_modified_since(&server, Some(Utc::now()))
511            .await
512            .expect("conditional release");
513        assert!(release.is_none());
514    }
515}