1use crate::{Error, Result};
2use base64::{Engine, engine::general_purpose::STANDARD};
3use reqwest::header::{CONTENT_LENGTH, ETAG, IF_MODIFIED_SINCE, IF_NONE_MATCH, LAST_MODIFIED};
4use reqwest::{Client, StatusCode};
5use sha2::{Digest, Sha256};
6use std::time::Duration;
7use tracing::{debug, info};
8
9pub struct Fetcher {
11 client: Client,
12}
13
14impl Fetcher {
15 pub fn new() -> Result<Self> {
17 Self::with_timeout(Duration::from_secs(30))
18 }
19
20 pub fn with_timeout(timeout: Duration) -> Result<Self> {
22 let client = Client::builder()
23 .timeout(timeout)
24 .user_agent(concat!("outfitter-blz/", env!("CARGO_PKG_VERSION")))
25 .gzip(true)
26 .brotli(true)
27 .build()
28 .map_err(Error::Network)?;
29 Ok(Self { client })
30 }
31
32 pub async fn fetch_with_cache(
34 &self,
35 url: &str,
36 etag: Option<&str>,
37 last_modified: Option<&str>,
38 ) -> Result<FetchResult> {
39 let mut request = self.client.get(url);
40
41 if let Some(tag) = etag {
42 debug!("Setting If-None-Match: {}", tag);
43 request = request.header(IF_NONE_MATCH, tag);
44 }
45
46 if let Some(lm) = last_modified {
47 debug!("Setting If-Modified-Since: {}", lm);
48 request = request.header(IF_MODIFIED_SINCE, lm);
49 }
50
51 let response = request.send().await?;
52 let status = response.status();
53
54 if status == StatusCode::NOT_MODIFIED {
55 info!("Resource not modified (304) for {}", url);
56
57 let etag = response
59 .headers()
60 .get(ETAG)
61 .and_then(|v| v.to_str().ok())
62 .map(std::string::ToString::to_string);
63
64 let last_modified = response
65 .headers()
66 .get(LAST_MODIFIED)
67 .and_then(|v| v.to_str().ok())
68 .map(std::string::ToString::to_string);
69
70 return Ok(FetchResult::NotModified {
71 etag,
72 last_modified,
73 });
74 }
75
76 if !status.is_success() {
77 if status == StatusCode::NOT_FOUND {
79 return Err(Error::NotFound(format!(
80 "Resource not found at '{url}'. Check the URL or try 'blz lookup' to find available sources"
81 )));
82 }
83
84 match response.error_for_status() {
86 Ok(_) => unreachable!("Status should be an error"),
87 Err(err) => return Err(Error::Network(err)),
88 }
89 }
90
91 let new_etag = response
92 .headers()
93 .get(ETAG)
94 .and_then(|v| v.to_str().ok())
95 .map(std::string::ToString::to_string);
96
97 let new_last_modified = response
98 .headers()
99 .get(LAST_MODIFIED)
100 .and_then(|v| v.to_str().ok())
101 .map(std::string::ToString::to_string);
102
103 let content = response.text().await?;
104 let sha256 = calculate_sha256(&content);
105
106 info!("Fetched {} bytes from {}", content.len(), url);
107
108 Ok(FetchResult::Modified {
109 content,
110 etag: new_etag,
111 last_modified: new_last_modified,
112 sha256,
113 })
114 }
115
116 pub async fn fetch(&self, url: &str) -> Result<(String, String)> {
118 let response = self.client.get(url).send().await?;
119 let status = response.status();
120
121 if !status.is_success() {
122 if status == StatusCode::NOT_FOUND {
124 return Err(Error::NotFound(format!(
125 "Resource not found at '{url}'. Check the URL or try 'blz lookup' to find available sources"
126 )));
127 }
128
129 match response.error_for_status() {
131 Ok(_) => unreachable!("Status should be an error"),
132 Err(err) => return Err(Error::Network(err)),
133 }
134 }
135
136 let content = response.text().await?;
137 let sha256 = calculate_sha256(&content);
138
139 Ok((content, sha256))
140 }
141
142 pub async fn head_metadata(&self, url: &str) -> Result<HeadInfo> {
144 let response = self.client.head(url).send().await?;
145 let status = response.status();
146
147 let content_length = response
148 .headers()
149 .get(CONTENT_LENGTH)
150 .and_then(|v| v.to_str().ok())
151 .and_then(|s| s.parse::<u64>().ok());
152
153 let etag = response
154 .headers()
155 .get(ETAG)
156 .and_then(|v| v.to_str().ok())
157 .map(std::string::ToString::to_string);
158
159 let last_modified = response
160 .headers()
161 .get(LAST_MODIFIED)
162 .and_then(|v| v.to_str().ok())
163 .map(std::string::ToString::to_string);
164
165 Ok(HeadInfo {
166 status: status.as_u16(),
167 content_length,
168 etag,
169 last_modified,
170 })
171 }
172}
173
174#[derive(Debug, Clone)]
176pub struct HeadInfo {
177 pub status: u16,
179 pub content_length: Option<u64>,
181 pub etag: Option<String>,
183 pub last_modified: Option<String>,
185}
186
187pub enum FetchResult {
189 NotModified {
191 etag: Option<String>,
193 last_modified: Option<String>,
195 },
196 Modified {
198 content: String,
200 etag: Option<String>,
202 last_modified: Option<String>,
204 sha256: String,
206 },
207}
208
209fn calculate_sha256(content: &str) -> String {
210 let mut hasher = Sha256::new();
211 hasher.update(content.as_bytes());
212 let result = hasher.finalize();
213 STANDARD.encode(result)
214}
215
216#[cfg(test)]
220#[allow(
221 clippy::unwrap_used,
222 clippy::panic,
223 clippy::disallowed_macros,
224 clippy::match_wildcard_for_single_variants
225)]
226mod tests {
227 use super::*;
228 use std::time::Duration;
229 use wiremock::{
230 Mock, MockServer, ResponseTemplate,
231 matchers::{header, method, path},
232 };
233
234 #[tokio::test]
235 async fn test_fetcher_creation() {
236 let result = Fetcher::new();
238 assert!(result.is_ok(), "Fetcher creation should succeed");
239
240 let _fetcher = result.unwrap();
241 }
244
245 #[tokio::test]
246 async fn test_fetch_with_etag_not_modified() -> anyhow::Result<()> {
247 let mock_server = MockServer::start().await;
249
250 Mock::given(method("GET"))
252 .and(path("/llms.txt"))
253 .and(header("If-None-Match", "\"test-etag\""))
254 .respond_with(ResponseTemplate::new(304))
255 .mount(&mock_server)
256 .await;
257
258 let fetcher = Fetcher::new()?;
259 let url = format!("{}/llms.txt", mock_server.uri());
260
261 let result = fetcher
263 .fetch_with_cache(&url, Some("\"test-etag\""), None)
264 .await?;
265
266 match result {
267 FetchResult::NotModified { .. } => {
268 },
270 _ => panic!("Expected NotModified result for matching ETag"),
271 }
272
273 Ok(())
274 }
275
276 #[tokio::test]
277 async fn test_fetch_with_etag_modified() -> anyhow::Result<()> {
278 let mock_server = MockServer::start().await;
280
281 let content = "# Test Content\n\nThis is test content.";
282
283 Mock::given(method("GET"))
285 .and(path("/llms.txt"))
286 .and(header("If-None-Match", "\"old-etag\""))
287 .respond_with(
288 ResponseTemplate::new(200)
289 .set_body_string(content)
290 .insert_header("etag", "\"new-etag\"")
291 .insert_header("last-modified", "Wed, 21 Oct 2015 07:28:00 GMT"),
292 )
293 .mount(&mock_server)
294 .await;
295
296 let fetcher = Fetcher::new()?;
297 let url = format!("{}/llms.txt", mock_server.uri());
298
299 let result = fetcher
301 .fetch_with_cache(&url, Some("\"old-etag\""), None)
302 .await?;
303
304 match result {
305 FetchResult::Modified {
306 content: returned_content,
307 etag,
308 last_modified,
309 sha256,
310 } => {
311 assert_eq!(returned_content, content);
312 assert_eq!(etag, Some("\"new-etag\"".to_string()));
313 assert_eq!(
314 last_modified,
315 Some("Wed, 21 Oct 2015 07:28:00 GMT".to_string())
316 );
317 assert!(!sha256.is_empty(), "SHA256 should be computed");
318 },
319 _ => panic!("Expected Modified result for non-matching ETag"),
320 }
321
322 Ok(())
323 }
324
325 #[allow(dead_code)]
328 async fn test_fetch_with_last_modified() -> anyhow::Result<()> {
329 let mock_server = MockServer::start().await;
331
332 Mock::given(method("GET"))
334 .and(path("/llms.txt"))
335 .and(header("If-Modified-Since", "Wed, 21 Oct 2015 07:28:00 GMT"))
336 .respond_with(ResponseTemplate::new(304))
337 .mount(&mock_server)
338 .await;
339
340 let fetcher = Fetcher::new()?;
341 let url = format!("{}/llms.txt", mock_server.uri());
342
343 let result = fetcher
345 .fetch_with_cache(&url, None, Some("Wed, 21 Oct 2015 07:28:00 GMT"))
346 .await?;
347
348 match result {
349 FetchResult::NotModified { .. } => {
350 },
352 _ => panic!("Expected NotModified result for matching Last-Modified"),
353 }
354
355 Ok(())
356 }
357
358 #[tokio::test]
359 async fn test_fetch_404_error() -> anyhow::Result<()> {
360 let mock_server = MockServer::start().await;
362
363 Mock::given(method("GET"))
365 .and(path("/nonexistent.txt"))
366 .respond_with(ResponseTemplate::new(404))
367 .mount(&mock_server)
368 .await;
369
370 let fetcher = Fetcher::new()?;
371 let url = format!("{}/nonexistent.txt", mock_server.uri());
372
373 let result = fetcher.fetch_with_cache(&url, None, None).await;
375
376 assert!(result.is_err(), "404 should result in error");
377
378 match result {
379 Err(Error::NotFound(msg)) => {
380 assert!(msg.contains("not found"));
382 assert!(msg.contains("blz lookup"));
383 },
384 Err(e) => panic!("Expected NotFound error, got: {e}"),
385 Ok(_) => panic!("Expected error for 404 response"),
386 }
387
388 Ok(())
389 }
390
391 #[tokio::test]
392 async fn test_fetch_500_error() -> anyhow::Result<()> {
393 let mock_server = MockServer::start().await;
395
396 Mock::given(method("GET"))
398 .and(path("/error.txt"))
399 .respond_with(ResponseTemplate::new(500))
400 .mount(&mock_server)
401 .await;
402
403 let fetcher = Fetcher::new()?;
404 let url = format!("{}/error.txt", mock_server.uri());
405
406 let result = fetcher.fetch_with_cache(&url, None, None).await;
408
409 assert!(result.is_err(), "500 should result in error");
410
411 match result {
412 Err(Error::Network(_)) => {
413 },
415 Err(e) => panic!("Expected Network error, got: {e}"),
416 Ok(_) => panic!("Expected error for 500 response"),
417 }
418
419 Ok(())
420 }
421
422 #[tokio::test]
423 async fn test_fetch_timeout() -> anyhow::Result<()> {
424 let mock_server = MockServer::start().await;
426
427 Mock::given(method("GET"))
428 .and(path("/slow.txt"))
429 .respond_with(
430 ResponseTemplate::new(200)
431 .set_body_string("slow content")
432 .set_delay(Duration::from_millis(500)), )
434 .mount(&mock_server)
435 .await;
436
437 let fetcher = Fetcher::with_timeout(Duration::from_millis(200))?;
439 let url = format!("{}/slow.txt", mock_server.uri());
440
441 let start_time = std::time::Instant::now();
442 let result = fetcher.fetch_with_cache(&url, None, None).await;
443 let elapsed = start_time.elapsed();
444
445 assert!(result.is_err(), "Slow request should timeout");
447 assert!(
448 elapsed < Duration::from_millis(500),
449 "Should timeout before server's 500ms delay"
450 );
451
452 Ok(())
453 }
454
455 #[tokio::test]
456 async fn test_fetch_simple_without_cache() -> anyhow::Result<()> {
457 let mock_server = MockServer::start().await;
459
460 let content = "# Simple Content\n\nThis is simple test content.";
461
462 Mock::given(method("GET"))
463 .and(path("/simple.txt"))
464 .respond_with(ResponseTemplate::new(200).set_body_string(content))
465 .mount(&mock_server)
466 .await;
467
468 let fetcher = Fetcher::new()?;
469 let url = format!("{}/simple.txt", mock_server.uri());
470
471 let (returned_content, sha256) = fetcher.fetch(&url).await?;
473
474 assert_eq!(returned_content, content);
475 assert!(!sha256.is_empty(), "SHA256 should be computed");
476
477 let expected_sha = calculate_sha256(content);
479 assert_eq!(sha256, expected_sha);
480
481 Ok(())
482 }
483
484 #[allow(dead_code)]
487 async fn test_fetch_with_both_etag_and_last_modified() -> anyhow::Result<()> {
488 let mock_server = MockServer::start().await;
490
491 Mock::given(method("GET"))
493 .and(path("/both.txt"))
494 .and(header("If-None-Match", "\"test-etag\""))
495 .and(header("If-Modified-Since", "Wed, 21 Oct 2015 07:28:00 GMT"))
496 .respond_with(ResponseTemplate::new(304))
497 .mount(&mock_server)
498 .await;
499
500 let fetcher = Fetcher::new()?;
501 let url = format!("{}/both.txt", mock_server.uri());
502
503 let result = fetcher
505 .fetch_with_cache(
506 &url,
507 Some("\"test-etag\""),
508 Some("Wed, 21 Oct 2015 07:28:00 GMT"),
509 )
510 .await?;
511
512 match result {
513 FetchResult::NotModified { .. } => {
514 },
516 _ => panic!("Expected NotModified result for matching cache headers"),
517 }
518
519 Ok(())
520 }
521
522 #[tokio::test]
523 async fn test_sha256_calculation() {
524 let content = "Hello, World!";
526 let sha256 = calculate_sha256(content);
527
528 assert!(!sha256.is_empty());
531 assert_eq!(sha256.len(), 44); let empty_sha256 = calculate_sha256("");
535 assert_eq!(empty_sha256, "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
536 }
537
538 #[tokio::test]
539 async fn test_invalid_urls() -> anyhow::Result<()> {
540 let fetcher = Fetcher::new()?;
541
542 let invalid_urls = vec![
544 "not-a-url",
545 "ftp://invalid-protocol.com/llms.txt",
546 "",
547 "https://",
548 ];
549
550 for invalid_url in invalid_urls {
551 let result = fetcher.fetch_with_cache(invalid_url, None, None).await;
552 assert!(result.is_err(), "Invalid URL '{invalid_url}' should fail");
553 }
554
555 Ok(())
556 }
557
558 #[tokio::test]
559 async fn test_concurrent_requests() -> anyhow::Result<()> {
560 let mock_server = MockServer::start().await;
562
563 Mock::given(method("GET"))
564 .and(path("/concurrent.txt"))
565 .respond_with(ResponseTemplate::new(200).set_body_string("concurrent content"))
566 .mount(&mock_server)
567 .await;
568
569 let _fetcher = Fetcher::new()?;
570 let url = format!("{}/concurrent.txt", mock_server.uri());
571
572 let mut handles = Vec::new();
574
575 for i in 0..10 {
576 let fetcher_clone = Fetcher::new()?;
577 let url_clone = url.clone();
578
579 handles.push(tokio::spawn(async move {
580 let result = fetcher_clone.fetch(&url_clone).await;
581 (i, result)
582 }));
583 }
584
585 let results = futures::future::join_all(handles).await;
587
588 for result in results {
590 let (index, fetch_result) = result.expect("Task should complete");
591
592 match fetch_result {
593 Ok((content, sha256)) => {
594 assert_eq!(content, "concurrent content");
595 assert!(!sha256.is_empty());
596 },
597 Err(e) => panic!("Request {index} should succeed: {e}"),
598 }
599 }
600
601 Ok(())
602 }
603}