Skip to main content

upstream_rs/providers/http/
webscraper_adapter.rs

1use anyhow::{Result, anyhow, bail};
2use chrono::{DateTime, Datelike, Timelike, Utc};
3use std::path::Path;
4
5use crate::models::common::Version;
6use crate::models::provider::{Asset, Release};
7use crate::providers::http::http_client::{ConditionalDiscoveryResult, HttpClient};
8
9#[derive(Debug, Clone)]
10pub struct WebScraperAdapter {
11    client: HttpClient,
12}
13
14impl WebScraperAdapter {
15    fn parse_version_from_filename(filename: &str) -> Option<Version> {
16        Version::from_filename(filename).ok()
17    }
18
19    fn version_from_last_modified(dt: DateTime<Utc>) -> Version {
20        // Monotonic semver-like mapping for stable update comparisons.
21        let major = dt.year_ce().1;
22        let minor = dt.ordinal();
23        let patch = dt.num_seconds_from_midnight();
24        Version::new(major, minor, patch, false)
25    }
26
27    pub fn new(client: HttpClient) -> Self {
28        Self { client }
29    }
30
31    pub async fn download_asset<F>(
32        &self,
33        asset: &Asset,
34        destination_path: &Path,
35        dl_callback: &mut Option<F>,
36    ) -> Result<()>
37    where
38        F: FnMut(u64, u64),
39    {
40        self.client
41            .download_file(&asset.download_url, destination_path, dl_callback)
42            .await
43    }
44
45    pub async fn get_release_by_tag(&self, _slug: &str, _tag: &str) -> Result<Release> {
46        bail!("HTTP provider does not support tagged releases")
47    }
48
49    pub async fn get_latest_release(&self, slug: &str) -> Result<Release> {
50        self.get_latest_release_if_modified_since(slug, None)
51            .await?
52            .ok_or_else(|| anyhow!("Unexpected not-modified response for scraper provider"))
53    }
54
55    pub async fn get_latest_release_if_modified_since(
56        &self,
57        slug: &str,
58        last_upgraded: Option<DateTime<Utc>>,
59    ) -> Result<Option<Release>> {
60        let discovery = self
61            .client
62            .discover_assets_if_modified_since(slug, last_upgraded)
63            .await?;
64        let mut infos = match discovery {
65            ConditionalDiscoveryResult::NotModified => return Ok(None),
66            ConditionalDiscoveryResult::Assets(infos) => infos,
67        };
68
69        let mut best_version: Option<Version> = None;
70        for info in &infos {
71            if let Some(version) = Self::parse_version_from_filename(&info.name) {
72                match &best_version {
73                    Some(prev) if prev.cmp(&version).is_ge() => {}
74                    _ => best_version = Some(version),
75                }
76            }
77        }
78
79        if best_version.is_none() {
80            let hydrate_limit = infos.len().min(24);
81            for info in infos.iter_mut().take(hydrate_limit) {
82                let url = info.download_url.clone();
83                if let Ok(probed) = self.client.probe_asset(&url).await {
84                    info.size = probed.size;
85                    info.last_modified = probed.last_modified;
86                    info.etag = probed.etag;
87                }
88            }
89        }
90
91        if best_version.is_none() {
92            for info in &infos {
93                if let Some(last_modified) = info.last_modified {
94                    let version = Self::version_from_last_modified(last_modified);
95                    match &best_version {
96                        Some(prev) if prev.cmp(&version).is_ge() => {}
97                        _ => best_version = Some(version),
98                    }
99                }
100            }
101        }
102
103        let selected_infos = if let Some(target_version) = &best_version {
104            let filtered: Vec<_> = infos
105                .iter()
106                .filter(|info| {
107                    Self::parse_version_from_filename(&info.name)
108                        .map(|v| v.cmp(target_version).is_eq())
109                        .unwrap_or(false)
110                })
111                .cloned()
112                .collect();
113            if filtered.is_empty() { infos } else { filtered }
114        } else {
115            infos
116        };
117
118        let published_at = selected_infos
119            .iter()
120            .filter_map(|i| i.last_modified)
121            .max()
122            .unwrap_or_else(Utc::now);
123
124        let assets: Vec<Asset> = selected_infos
125            .iter()
126            .enumerate()
127            .map(|(idx, info)| {
128                Asset::new(
129                    info.download_url.clone(),
130                    (idx + 1) as u64,
131                    info.name.clone(),
132                    info.size,
133                    info.last_modified.unwrap_or(published_at),
134                )
135            })
136            .collect();
137
138        let version = best_version.unwrap_or_else(|| Version::new(0, 0, 0, false));
139        let release_name = if assets.len() == 1 {
140            let info = &selected_infos[0];
141            if let Some(etag) = &info.etag {
142                format!("{} [{}]", info.name, etag)
143            } else {
144                info.name.clone()
145            }
146        } else {
147            format!("Discovered {} assets", assets.len())
148        };
149        Ok(Some(Release {
150            id: 1,
151            tag: "direct".to_string(),
152            name: release_name,
153            body: "Discovered from HTTP source".to_string(),
154            is_draft: false,
155            is_prerelease: false,
156            assets,
157            version,
158            published_at,
159        }))
160    }
161
162    pub async fn get_releases(
163        &self,
164        slug: &str,
165        _per_page: Option<u32>,
166        _max_total: Option<u32>,
167    ) -> Result<Vec<Release>> {
168        Ok(vec![self.get_latest_release(slug).await?])
169    }
170}
171
172#[cfg(test)]
173mod tests {
174    use super::WebScraperAdapter;
175    use crate::providers::http::HttpClient;
176    use chrono::Utc;
177    use std::io::{BufRead, BufReader, Write};
178    use std::net::TcpListener;
179    use std::sync::mpsc;
180    use std::thread;
181
182    fn spawn_test_server<F>(max_requests: usize, handler: F) -> String
183    where
184        F: Fn(&str, &str) -> String + Send + 'static,
185    {
186        let (tx, rx) = mpsc::channel();
187        thread::spawn(move || {
188            let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
189            let addr = listener.local_addr().expect("resolve local addr");
190            tx.send(addr).expect("send test server addr");
191
192            for _ in 0..max_requests {
193                let (mut stream, _) = listener.accept().expect("accept request");
194                let cloned = stream.try_clone().expect("clone stream");
195                let mut reader = BufReader::new(cloned);
196
197                let mut request_line = String::new();
198                reader
199                    .read_line(&mut request_line)
200                    .expect("read request line");
201                let mut parts = request_line.split_whitespace();
202                let method = parts.next().unwrap_or("");
203                let path = parts.next().unwrap_or("/");
204
205                let mut line = String::new();
206                loop {
207                    line.clear();
208                    reader.read_line(&mut line).expect("read request headers");
209                    if line == "\r\n" || line.is_empty() {
210                        break;
211                    }
212                }
213
214                let response = handler(method, path);
215                stream
216                    .write_all(response.as_bytes())
217                    .expect("write response");
218                stream.flush().expect("flush response");
219            }
220        });
221
222        let addr = rx.recv().expect("receive server address");
223        format!("http://{}", addr)
224    }
225
226    fn http_response(status_line: &str, headers: &[(&str, &str)], body: &str) -> String {
227        let mut out = format!("{status_line}\r\n");
228        for (k, v) in headers {
229            out.push_str(&format!("{k}: {v}\r\n"));
230        }
231        out.push_str("\r\n");
232        out.push_str(body);
233        out
234    }
235
236    #[test]
237    fn parse_version_from_filename_extracts_semver_triplet() {
238        let version = WebScraperAdapter::parse_version_from_filename("tool-v1.4.9-linux.tar.gz")
239            .expect("parsed version");
240        assert_eq!(version.major, 1);
241        assert_eq!(version.minor, 4);
242        assert_eq!(version.patch, 9);
243    }
244
245    #[tokio::test]
246    async fn get_latest_release_selects_assets_for_latest_detected_version() {
247        let html = r#"
248                <html><body>
249                    <a href="/tool-v1.9.0-linux.tar.gz">old</a>
250                    <a href="/tool-v1.10.0-linux.tar.gz">new</a>
251                    <a href="/tool-v1.10.0-linux.sha256">checksum</a>
252                </body></html>
253            "#
254        .to_string();
255        let html_len = html.len().to_string();
256        let html_for_server = html.clone();
257        let server = spawn_test_server(1, move |method, _| {
258            assert_eq!(method, "GET");
259            http_response(
260                "HTTP/1.1 200 OK",
261                &[
262                    ("Connection", "close"),
263                    ("Content-Type", "text/html"),
264                    ("Content-Length", &html_len),
265                ],
266                &html_for_server,
267            )
268        });
269
270        let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
271        let release = adapter
272            .get_latest_release(&server)
273            .await
274            .expect("latest release");
275
276        assert_eq!(release.version.major, 1);
277        assert_eq!(release.version.minor, 10);
278        assert_eq!(release.version.patch, 0);
279        assert_eq!(release.assets.len(), 1);
280        assert!(release.assets[0].name.contains("1.10.0"));
281    }
282
283    #[tokio::test]
284    async fn conditional_latest_release_returns_none_on_not_modified() {
285        let server = spawn_test_server(1, move |method, _| {
286            assert_eq!(method, "GET");
287            http_response("HTTP/1.1 304 Not Modified", &[("Connection", "close")], "")
288        });
289        let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
290        let release = adapter
291            .get_latest_release_if_modified_since(&server, Some(Utc::now()))
292            .await
293            .expect("conditional release");
294        assert!(release.is_none());
295    }
296}