upstream_rs/providers/http/
webscraper_adapter.rs1use anyhow::{Result, anyhow, bail};
2use chrono::{DateTime, Datelike, Timelike, Utc};
3use std::path::Path;
4
5use crate::models::common::Version;
6use crate::models::provider::{Asset, Release};
7use crate::providers::http::http_client::{ConditionalDiscoveryResult, HttpClient};
8
9#[derive(Debug, Clone)]
10pub struct WebScraperAdapter {
11 client: HttpClient,
12}
13
14impl WebScraperAdapter {
15 fn parse_version_from_filename(filename: &str) -> Option<Version> {
16 Version::from_filename(filename).ok()
17 }
18
19 fn version_from_last_modified(dt: DateTime<Utc>) -> Version {
20 let major = dt.year_ce().1;
22 let minor = dt.ordinal();
23 let patch = dt.num_seconds_from_midnight();
24 Version::new(major, minor, patch, false)
25 }
26
27 pub fn new(client: HttpClient) -> Self {
28 Self { client }
29 }
30
31 pub async fn download_asset<F>(
32 &self,
33 asset: &Asset,
34 destination_path: &Path,
35 dl_callback: &mut Option<F>,
36 ) -> Result<()>
37 where
38 F: FnMut(u64, u64),
39 {
40 self.client
41 .download_file(&asset.download_url, destination_path, dl_callback)
42 .await
43 }
44
45 pub async fn get_release_by_tag(&self, _slug: &str, _tag: &str) -> Result<Release> {
46 bail!("HTTP provider does not support tagged releases")
47 }
48
49 pub async fn get_latest_release(&self, slug: &str) -> Result<Release> {
50 self.get_latest_release_if_modified_since(slug, None)
51 .await?
52 .ok_or_else(|| anyhow!("Unexpected not-modified response for scraper provider"))
53 }
54
55 pub async fn get_latest_release_if_modified_since(
56 &self,
57 slug: &str,
58 last_upgraded: Option<DateTime<Utc>>,
59 ) -> Result<Option<Release>> {
60 let discovery = self
61 .client
62 .discover_assets_if_modified_since(slug, last_upgraded)
63 .await?;
64 let mut infos = match discovery {
65 ConditionalDiscoveryResult::NotModified => return Ok(None),
66 ConditionalDiscoveryResult::Assets(infos) => infos,
67 };
68
69 let mut best_version: Option<Version> = None;
70 for info in &infos {
71 if let Some(version) = Self::parse_version_from_filename(&info.name) {
72 match &best_version {
73 Some(prev) if prev.cmp(&version).is_ge() => {}
74 _ => best_version = Some(version),
75 }
76 }
77 }
78
79 if best_version.is_none() {
80 let hydrate_limit = infos.len().min(24);
81 for info in infos.iter_mut().take(hydrate_limit) {
82 let url = info.download_url.clone();
83 if let Ok(probed) = self.client.probe_asset(&url).await {
84 info.size = probed.size;
85 info.last_modified = probed.last_modified;
86 info.etag = probed.etag;
87 }
88 }
89 }
90
91 if best_version.is_none() {
92 for info in &infos {
93 if let Some(last_modified) = info.last_modified {
94 let version = Self::version_from_last_modified(last_modified);
95 match &best_version {
96 Some(prev) if prev.cmp(&version).is_ge() => {}
97 _ => best_version = Some(version),
98 }
99 }
100 }
101 }
102
103 let selected_infos = if let Some(target_version) = &best_version {
104 let filtered: Vec<_> = infos
105 .iter()
106 .filter(|info| {
107 Self::parse_version_from_filename(&info.name)
108 .map(|v| v.cmp(target_version).is_eq())
109 .unwrap_or(false)
110 })
111 .cloned()
112 .collect();
113 if filtered.is_empty() { infos } else { filtered }
114 } else {
115 infos
116 };
117
118 let published_at = selected_infos
119 .iter()
120 .filter_map(|i| i.last_modified)
121 .max()
122 .unwrap_or_else(Utc::now);
123
124 let assets: Vec<Asset> = selected_infos
125 .iter()
126 .enumerate()
127 .map(|(idx, info)| {
128 Asset::new(
129 info.download_url.clone(),
130 (idx + 1) as u64,
131 info.name.clone(),
132 info.size,
133 info.last_modified.unwrap_or(published_at),
134 )
135 })
136 .collect();
137
138 let version = best_version.unwrap_or_else(|| Version::new(0, 0, 0, false));
139 let release_name = if assets.len() == 1 {
140 let info = &selected_infos[0];
141 if let Some(etag) = &info.etag {
142 format!("{} [{}]", info.name, etag)
143 } else {
144 info.name.clone()
145 }
146 } else {
147 format!("Discovered {} assets", assets.len())
148 };
149 Ok(Some(Release {
150 id: 1,
151 tag: "direct".to_string(),
152 name: release_name,
153 body: "Discovered from HTTP source".to_string(),
154 is_draft: false,
155 is_prerelease: false,
156 assets,
157 version,
158 published_at,
159 }))
160 }
161
162 pub async fn get_releases(
163 &self,
164 slug: &str,
165 _per_page: Option<u32>,
166 _max_total: Option<u32>,
167 ) -> Result<Vec<Release>> {
168 Ok(vec![self.get_latest_release(slug).await?])
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::WebScraperAdapter;
175 use crate::providers::http::HttpClient;
176 use chrono::Utc;
177 use std::io::{BufRead, BufReader, Write};
178 use std::net::TcpListener;
179 use std::sync::mpsc;
180 use std::thread;
181
182 fn spawn_test_server<F>(max_requests: usize, handler: F) -> String
183 where
184 F: Fn(&str, &str) -> String + Send + 'static,
185 {
186 let (tx, rx) = mpsc::channel();
187 thread::spawn(move || {
188 let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
189 let addr = listener.local_addr().expect("resolve local addr");
190 tx.send(addr).expect("send test server addr");
191
192 for _ in 0..max_requests {
193 let (mut stream, _) = listener.accept().expect("accept request");
194 let cloned = stream.try_clone().expect("clone stream");
195 let mut reader = BufReader::new(cloned);
196
197 let mut request_line = String::new();
198 reader
199 .read_line(&mut request_line)
200 .expect("read request line");
201 let mut parts = request_line.split_whitespace();
202 let method = parts.next().unwrap_or("");
203 let path = parts.next().unwrap_or("/");
204
205 let mut line = String::new();
206 loop {
207 line.clear();
208 reader.read_line(&mut line).expect("read request headers");
209 if line == "\r\n" || line.is_empty() {
210 break;
211 }
212 }
213
214 let response = handler(method, path);
215 stream
216 .write_all(response.as_bytes())
217 .expect("write response");
218 stream.flush().expect("flush response");
219 }
220 });
221
222 let addr = rx.recv().expect("receive server address");
223 format!("http://{}", addr)
224 }
225
226 fn http_response(status_line: &str, headers: &[(&str, &str)], body: &str) -> String {
227 let mut out = format!("{status_line}\r\n");
228 for (k, v) in headers {
229 out.push_str(&format!("{k}: {v}\r\n"));
230 }
231 out.push_str("\r\n");
232 out.push_str(body);
233 out
234 }
235
236 #[test]
237 fn parse_version_from_filename_extracts_semver_triplet() {
238 let version = WebScraperAdapter::parse_version_from_filename("tool-v1.4.9-linux.tar.gz")
239 .expect("parsed version");
240 assert_eq!(version.major, 1);
241 assert_eq!(version.minor, 4);
242 assert_eq!(version.patch, 9);
243 }
244
245 #[tokio::test]
246 async fn get_latest_release_selects_assets_for_latest_detected_version() {
247 let html = r#"
248 <html><body>
249 <a href="/tool-v1.9.0-linux.tar.gz">old</a>
250 <a href="/tool-v1.10.0-linux.tar.gz">new</a>
251 <a href="/tool-v1.10.0-linux.sha256">checksum</a>
252 </body></html>
253 "#
254 .to_string();
255 let html_len = html.len().to_string();
256 let html_for_server = html.clone();
257 let server = spawn_test_server(1, move |method, _| {
258 assert_eq!(method, "GET");
259 http_response(
260 "HTTP/1.1 200 OK",
261 &[
262 ("Connection", "close"),
263 ("Content-Type", "text/html"),
264 ("Content-Length", &html_len),
265 ],
266 &html_for_server,
267 )
268 });
269
270 let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
271 let release = adapter
272 .get_latest_release(&server)
273 .await
274 .expect("latest release");
275
276 assert_eq!(release.version.major, 1);
277 assert_eq!(release.version.minor, 10);
278 assert_eq!(release.version.patch, 0);
279 assert_eq!(release.assets.len(), 1);
280 assert!(release.assets[0].name.contains("1.10.0"));
281 }
282
283 #[tokio::test]
284 async fn conditional_latest_release_returns_none_on_not_modified() {
285 let server = spawn_test_server(1, move |method, _| {
286 assert_eq!(method, "GET");
287 http_response("HTTP/1.1 304 Not Modified", &[("Connection", "close")], "")
288 });
289 let adapter = WebScraperAdapter::new(HttpClient::new().expect("http client"));
290 let release = adapter
291 .get_latest_release_if_modified_since(&server, Some(Utc::now()))
292 .await
293 .expect("conditional release");
294 assert!(release.is_none());
295 }
296}