blz_core/
fetcher.rs

1use crate::{Error, Result};
2use base64::{Engine, engine::general_purpose::STANDARD};
3use reqwest::header::{CONTENT_LENGTH, ETAG, IF_MODIFIED_SINCE, IF_NONE_MATCH, LAST_MODIFIED};
4use reqwest::{Client, StatusCode};
5use sha2::{Digest, Sha256};
6use std::time::Duration;
7use tracing::{debug, info};
8
9/// HTTP client for fetching llms.txt documentation with conditional request support
10pub struct Fetcher {
11    client: Client,
12}
13
14impl Fetcher {
15    /// Creates a new fetcher with configured HTTP client
16    pub fn new() -> Result<Self> {
17        Self::with_timeout(Duration::from_secs(30))
18    }
19
20    /// Creates a new fetcher with a custom request timeout (primarily for tests)
21    pub fn with_timeout(timeout: Duration) -> Result<Self> {
22        let client = Client::builder()
23            .timeout(timeout)
24            .user_agent(concat!("outfitter-blz/", env!("CARGO_PKG_VERSION")))
25            .gzip(true)
26            .brotli(true)
27            .build()
28            .map_err(Error::Network)?;
29        Ok(Self { client })
30    }
31
32    /// Fetches a URL with conditional request support using `ETag` and `Last-Modified` headers
33    pub async fn fetch_with_cache(
34        &self,
35        url: &str,
36        etag: Option<&str>,
37        last_modified: Option<&str>,
38    ) -> Result<FetchResult> {
39        let mut request = self.client.get(url);
40
41        if let Some(tag) = etag {
42            debug!("Setting If-None-Match: {}", tag);
43            request = request.header(IF_NONE_MATCH, tag);
44        }
45
46        if let Some(lm) = last_modified {
47            debug!("Setting If-Modified-Since: {}", lm);
48            request = request.header(IF_MODIFIED_SINCE, lm);
49        }
50
51        let response = request.send().await?;
52        let status = response.status();
53
54        if status == StatusCode::NOT_MODIFIED {
55            info!("Resource not modified (304) for {}", url);
56
57            // Extract ETag and Last-Modified headers even on 304
58            let etag = response
59                .headers()
60                .get(ETAG)
61                .and_then(|v| v.to_str().ok())
62                .map(std::string::ToString::to_string);
63
64            let last_modified = response
65                .headers()
66                .get(LAST_MODIFIED)
67                .and_then(|v| v.to_str().ok())
68                .map(std::string::ToString::to_string);
69
70            return Ok(FetchResult::NotModified {
71                etag,
72                last_modified,
73            });
74        }
75
76        if !status.is_success() {
77            // Map 404 to a clearer NotFound error
78            if status == StatusCode::NOT_FOUND {
79                return Err(Error::NotFound(format!(
80                    "Resource not found at '{url}'. Check the URL or try 'blz lookup' to find available sources"
81                )));
82            }
83
84            // Try to get the actual error, or create one manually
85            match response.error_for_status() {
86                Ok(_) => unreachable!("Status should be an error"),
87                Err(err) => return Err(Error::Network(err)),
88            }
89        }
90
91        let new_etag = response
92            .headers()
93            .get(ETAG)
94            .and_then(|v| v.to_str().ok())
95            .map(std::string::ToString::to_string);
96
97        let new_last_modified = response
98            .headers()
99            .get(LAST_MODIFIED)
100            .and_then(|v| v.to_str().ok())
101            .map(std::string::ToString::to_string);
102
103        let content = response.text().await?;
104        let sha256 = calculate_sha256(&content);
105
106        info!("Fetched {} bytes from {}", content.len(), url);
107
108        Ok(FetchResult::Modified {
109            content,
110            etag: new_etag,
111            last_modified: new_last_modified,
112            sha256,
113        })
114    }
115
116    /// Fetches a URL without conditional request support, returning content and `SHA256` hash
117    pub async fn fetch(&self, url: &str) -> Result<(String, String)> {
118        let response = self.client.get(url).send().await?;
119        let status = response.status();
120
121        if !status.is_success() {
122            // Map 404 to a clearer NotFound error
123            if status == StatusCode::NOT_FOUND {
124                return Err(Error::NotFound(format!(
125                    "Resource not found at '{url}'. Check the URL or try 'blz lookup' to find available sources"
126                )));
127            }
128
129            // Try to get the actual error, or create one manually
130            match response.error_for_status() {
131                Ok(_) => unreachable!("Status should be an error"),
132                Err(err) => return Err(Error::Network(err)),
133            }
134        }
135
136        let content = response.text().await?;
137        let sha256 = calculate_sha256(&content);
138
139        Ok((content, sha256))
140    }
141
142    /// Check for available llms.txt flavors
143    pub async fn check_flavors(&self, url: &str) -> Result<Vec<FlavorInfo>> {
144        let mut flavors = Vec::new();
145        let base_url = extract_base_url(url);
146
147        // List of possible flavors to check
148        let flavor_names = vec![
149            "llms-full.txt",
150            "llms.txt",
151            "llms-mini.txt",
152            "llms-base.txt",
153        ];
154
155        for flavor_name in flavor_names {
156            let flavor_url = format!("{base_url}/{flavor_name}");
157
158            // Make HEAD request to check if file exists and get size
159            match self.client.head(&flavor_url).send().await {
160                Ok(response) => {
161                    if response.status().is_success() {
162                        let size = response
163                            .headers()
164                            .get(CONTENT_LENGTH)
165                            .and_then(|v| v.to_str().ok())
166                            .and_then(|s| s.parse::<u64>().ok());
167
168                        flavors.push(FlavorInfo {
169                            name: flavor_name.to_string(),
170                            size,
171                            url: flavor_url,
172                        });
173                    }
174                },
175                Err(e) => {
176                    debug!("Failed to check flavor {}: {}", flavor_name, e);
177                    // If it's the original URL provided by user, still add it even if HEAD fails
178                    if url.ends_with(flavor_name) {
179                        flavors.push(FlavorInfo {
180                            name: flavor_name.to_string(),
181                            size: None,
182                            url: url.to_string(),
183                        });
184                    }
185                },
186            }
187        }
188
189        // If the user provided a specific llms.txt variant, make sure it's in the list
190        if let Some(filename) = url.split('/').next_back() {
191            // Strip query parameters and fragments for extension check
192            let clean_filename = filename
193                .split('?')
194                .next()
195                .unwrap_or(filename)
196                .split('#')
197                .next()
198                .unwrap_or(filename);
199
200            if clean_filename.starts_with("llms")
201                && std::path::Path::new(clean_filename)
202                    .extension()
203                    .is_some_and(|ext| ext.eq_ignore_ascii_case("txt"))
204                && !flavors.iter().any(|f| f.name == filename)
205            {
206                flavors.push(FlavorInfo {
207                    name: filename.to_string(),
208                    size: None,
209                    url: url.to_string(),
210                });
211            }
212        }
213
214        // Sort flavors by preference: llms-full.txt > llms.txt > others
215        flavors.sort_by(|a, b| {
216            let order_a = match a.name.as_str() {
217                "llms-full.txt" => 0,
218                "llms.txt" => 1,
219                "llms-mini.txt" => 2,
220                "llms-base.txt" => 3,
221                _ => 4,
222            };
223            let order_b = match b.name.as_str() {
224                "llms-full.txt" => 0,
225                "llms.txt" => 1,
226                "llms-mini.txt" => 2,
227                "llms-base.txt" => 3,
228                _ => 4,
229            };
230            order_a.cmp(&order_b)
231        });
232
233        Ok(flavors)
234    }
235
236    /// Perform a HEAD request to retrieve basic metadata for a URL without downloading content
237    pub async fn head_metadata(&self, url: &str) -> Result<HeadInfo> {
238        let response = self.client.head(url).send().await?;
239        let status = response.status();
240
241        let content_length = response
242            .headers()
243            .get(CONTENT_LENGTH)
244            .and_then(|v| v.to_str().ok())
245            .and_then(|s| s.parse::<u64>().ok());
246
247        let etag = response
248            .headers()
249            .get(ETAG)
250            .and_then(|v| v.to_str().ok())
251            .map(std::string::ToString::to_string);
252
253        let last_modified = response
254            .headers()
255            .get(LAST_MODIFIED)
256            .and_then(|v| v.to_str().ok())
257            .map(std::string::ToString::to_string);
258
259        Ok(HeadInfo {
260            status: status.as_u16(),
261            content_length,
262            etag,
263            last_modified,
264        })
265    }
266}
267
268/// Metadata from a HEAD request
269#[derive(Debug, Clone)]
270pub struct HeadInfo {
271    /// HTTP status code returned by the server (e.g., 200, 404)
272    pub status: u16,
273    /// Optional content length reported by the server via `Content-Length`
274    pub content_length: Option<u64>,
275    /// Optional entity tag returned by the server for cache validation
276    pub etag: Option<String>,
277    /// Optional last modified timestamp returned by the server
278    pub last_modified: Option<String>,
279}
280
281/// Result of a conditional HTTP fetch operation
282pub enum FetchResult {
283    /// Resource has not been modified since last fetch
284    NotModified {
285        /// `ETag` header value if present
286        etag: Option<String>,
287        /// `Last-Modified` header value if present
288        last_modified: Option<String>,
289    },
290    /// Resource has been modified and new content was fetched
291    Modified {
292        /// The fetched content
293        content: String,
294        /// `ETag` header value if present
295        etag: Option<String>,
296        /// `Last-Modified` header value if present
297        last_modified: Option<String>,
298        /// `SHA256` hash of the content
299        sha256: String,
300    },
301}
302
303/// Information about an available llms.txt flavor/variant
304#[derive(Debug, Clone)]
305pub struct FlavorInfo {
306    /// Name of the flavor (e.g., "llms-full.txt", "llms.txt")
307    pub name: String,
308    /// Size in bytes if available from `Content-Length` header
309    pub size: Option<u64>,
310    /// Full URL to fetch this flavor
311    pub url: String,
312}
313
314impl std::fmt::Display for FlavorInfo {
315    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316        if let Some(size) = self.size {
317            write!(f, "{} ({})", self.name, format_size(size))
318        } else {
319            write!(f, "{}", self.name)
320        }
321    }
322}
323
324fn calculate_sha256(content: &str) -> String {
325    let mut hasher = Sha256::new();
326    hasher.update(content.as_bytes());
327    let result = hasher.finalize();
328    STANDARD.encode(result)
329}
330
331fn extract_base_url(url: &str) -> String {
332    // Simply remove the filename from the URL
333    url.rfind('/').map_or_else(
334        || url.to_string(),
335        |last_slash| {
336            let start_pos = last_slash.saturating_sub(2);
337            if url.len() > 3 && &url[start_pos..=last_slash] == "://" {
338                url.to_string()
339            } else {
340                url[..last_slash].to_string()
341            }
342        },
343    )
344}
345
346fn format_size(bytes: u64) -> String {
347    const UNITS: &[&str] = &["B", "KB", "MB", "GB"];
348    #[allow(clippy::cast_precision_loss)]
349    let mut size = bytes as f64;
350    let mut unit_index = 0;
351
352    while size >= 1024.0 && unit_index < UNITS.len() - 1 {
353        size /= 1024.0;
354        unit_index += 1;
355    }
356
357    if unit_index == 0 {
358        format!("{} {}", bytes, UNITS[unit_index])
359    } else {
360        format!("{:.1} {}", size, UNITS[unit_index])
361    }
362}
363
364// Note: Default is not implemented as Fetcher::new() can fail.
365// Use Fetcher::new() directly and handle the Result.
366
367#[cfg(test)]
368#[allow(
369    clippy::unwrap_used,
370    clippy::panic,
371    clippy::disallowed_macros,
372    clippy::match_wildcard_for_single_variants
373)]
374mod tests {
375    use super::*;
376    use std::time::Duration;
377    use wiremock::{
378        Mock, MockServer, ResponseTemplate,
379        matchers::{header, method, path},
380    };
381
382    #[test]
383    fn test_extract_base_url() {
384        assert_eq!(
385            extract_base_url("https://example.com/llms.txt"),
386            "https://example.com"
387        );
388        assert_eq!(
389            extract_base_url("https://api.example.com/v1/docs/llms.txt"),
390            "https://api.example.com/v1/docs"
391        );
392        assert_eq!(
393            extract_base_url("https://example.com/"),
394            "https://example.com"
395        );
396        assert_eq!(
397            extract_base_url("https://example.com"),
398            "https://example.com"
399        );
400    }
401
402    #[test]
403    fn test_extract_base_url_edge_cases() {
404        // Test edge cases for URL parsing
405        assert_eq!(
406            extract_base_url("https://example.com/docs/api/v1/llms.txt"),
407            "https://example.com/docs/api/v1"
408        );
409
410        // URL with query parameters
411        assert_eq!(
412            extract_base_url("https://example.com/llms.txt?version=1"),
413            "https://example.com"
414        );
415
416        // URL with fragment
417        assert_eq!(
418            extract_base_url("https://example.com/docs/llms.txt#section"),
419            "https://example.com/docs"
420        );
421
422        // URLs that are just domains
423        assert_eq!(
424            extract_base_url("https://example.com"),
425            "https://example.com"
426        );
427        assert_eq!(extract_base_url("http://localhost"), "http://localhost");
428
429        // Handle scheme separator edge case
430        assert_eq!(extract_base_url("https://test.com"), "https://test.com");
431    }
432
433    #[test]
434    fn test_format_size() {
435        assert_eq!(format_size(0), "0 B");
436        assert_eq!(format_size(512), "512 B");
437        assert_eq!(format_size(1024), "1.0 KB");
438        assert_eq!(format_size(1536), "1.5 KB");
439        assert_eq!(format_size(1_048_576), "1.0 MB");
440        assert_eq!(format_size(1_572_864), "1.5 MB");
441        assert_eq!(format_size(1_073_741_824), "1.0 GB");
442        assert_eq!(format_size(2_147_483_648), "2.0 GB");
443    }
444
445    #[test]
446    fn test_format_size_boundary_values() {
447        // Test boundary values for size formatting
448        assert_eq!(format_size(1), "1 B");
449        assert_eq!(format_size(1023), "1023 B");
450        assert_eq!(format_size(1025), "1.0 KB");
451        assert_eq!(format_size(1024 * 1024 - 1), "1024.0 KB");
452        assert_eq!(format_size(1024 * 1024 + 1), "1.0 MB");
453
454        // Very large sizes
455        let huge_size = 1024u64 * 1024 * 1024 * 1024; // 1TB
456        let formatted = format_size(huge_size);
457        assert!(formatted.contains("GB")); // Will show as very large GB value
458
459        // Maximum u64 value
460        let max_size = u64::MAX;
461        let max_formatted = format_size(max_size);
462        assert!(!max_formatted.is_empty());
463    }
464
465    #[test]
466    fn test_flavor_info_display() {
467        let flavor_with_size = FlavorInfo {
468            name: "llms-full.txt".to_string(),
469            size: Some(892_000),
470            url: "https://example.com/llms-full.txt".to_string(),
471        };
472        assert_eq!(format!("{flavor_with_size}"), "llms-full.txt (871.1 KB)");
473
474        let flavor_no_size = FlavorInfo {
475            name: "llms.txt".to_string(),
476            size: None,
477            url: "https://example.com/llms.txt".to_string(),
478        };
479        assert_eq!(format!("{flavor_no_size}"), "llms.txt");
480    }
481
482    #[test]
483    fn test_flavor_info_display_various_sizes() {
484        let test_cases = vec![
485            (0, "llms.txt (0 B)"),
486            (1024, "llms.txt (1.0 KB)"),
487            (1_048_576, "llms.txt (1.0 MB)"),
488            (1_073_741_824, "llms.txt (1.0 GB)"),
489        ];
490
491        for (size, expected) in test_cases {
492            let flavor = FlavorInfo {
493                name: "llms.txt".to_string(),
494                size: Some(size),
495                url: "https://example.com/llms.txt".to_string(),
496            };
497            assert_eq!(format!("{flavor}"), expected);
498        }
499    }
500
501    #[tokio::test]
502    async fn test_fetcher_creation() {
503        // Test that fetcher can be created successfully
504        let result = Fetcher::new();
505        assert!(result.is_ok(), "Fetcher creation should succeed");
506
507        let _fetcher = result.unwrap();
508        // Verify it has the expected user agent and settings
509        // (This is implicit since we can't directly inspect the client)
510    }
511
512    #[tokio::test]
513    async fn test_fetch_with_etag_not_modified() -> anyhow::Result<()> {
514        // Setup mock server
515        let mock_server = MockServer::start().await;
516
517        // Mock 304 Not Modified response when ETag matches
518        Mock::given(method("GET"))
519            .and(path("/llms.txt"))
520            .and(header("If-None-Match", "\"test-etag\""))
521            .respond_with(ResponseTemplate::new(304))
522            .mount(&mock_server)
523            .await;
524
525        let fetcher = Fetcher::new()?;
526        let url = format!("{}/llms.txt", mock_server.uri());
527
528        // Test with matching ETag
529        let result = fetcher
530            .fetch_with_cache(&url, Some("\"test-etag\""), None)
531            .await?;
532
533        match result {
534            FetchResult::NotModified { .. } => {
535                // Expected result
536            },
537            _ => panic!("Expected NotModified result for matching ETag"),
538        }
539
540        Ok(())
541    }
542
543    #[tokio::test]
544    async fn test_fetch_with_etag_modified() -> anyhow::Result<()> {
545        // Setup mock server
546        let mock_server = MockServer::start().await;
547
548        let content = "# Test Content\n\nThis is test content.";
549
550        // Mock 200 OK response when ETag doesn't match
551        Mock::given(method("GET"))
552            .and(path("/llms.txt"))
553            .and(header("If-None-Match", "\"old-etag\""))
554            .respond_with(
555                ResponseTemplate::new(200)
556                    .set_body_string(content)
557                    .insert_header("etag", "\"new-etag\"")
558                    .insert_header("last-modified", "Wed, 21 Oct 2015 07:28:00 GMT"),
559            )
560            .mount(&mock_server)
561            .await;
562
563        let fetcher = Fetcher::new()?;
564        let url = format!("{}/llms.txt", mock_server.uri());
565
566        // Test with non-matching ETag
567        let result = fetcher
568            .fetch_with_cache(&url, Some("\"old-etag\""), None)
569            .await?;
570
571        match result {
572            FetchResult::Modified {
573                content: returned_content,
574                etag,
575                last_modified,
576                sha256,
577            } => {
578                assert_eq!(returned_content, content);
579                assert_eq!(etag, Some("\"new-etag\"".to_string()));
580                assert_eq!(
581                    last_modified,
582                    Some("Wed, 21 Oct 2015 07:28:00 GMT".to_string())
583                );
584                assert!(!sha256.is_empty(), "SHA256 should be computed");
585            },
586            _ => panic!("Expected Modified result for non-matching ETag"),
587        }
588
589        Ok(())
590    }
591
592    // Temporarily disabled - mock server setup needs adjustment
593    // #[tokio::test]
594    #[allow(dead_code)]
595    async fn test_fetch_with_last_modified() -> anyhow::Result<()> {
596        // Setup mock server
597        let mock_server = MockServer::start().await;
598
599        // Mock 304 Not Modified response when Last-Modified matches
600        Mock::given(method("GET"))
601            .and(path("/llms.txt"))
602            .and(header("If-Modified-Since", "Wed, 21 Oct 2015 07:28:00 GMT"))
603            .respond_with(ResponseTemplate::new(304))
604            .mount(&mock_server)
605            .await;
606
607        let fetcher = Fetcher::new()?;
608        let url = format!("{}/llms.txt", mock_server.uri());
609
610        // Test with Last-Modified header
611        let result = fetcher
612            .fetch_with_cache(&url, None, Some("Wed, 21 Oct 2015 07:28:00 GMT"))
613            .await?;
614
615        match result {
616            FetchResult::NotModified { .. } => {
617                // Expected result
618            },
619            _ => panic!("Expected NotModified result for matching Last-Modified"),
620        }
621
622        Ok(())
623    }
624
625    #[tokio::test]
626    async fn test_fetch_404_error() -> anyhow::Result<()> {
627        // Setup mock server
628        let mock_server = MockServer::start().await;
629
630        // Mock 404 Not Found response
631        Mock::given(method("GET"))
632            .and(path("/nonexistent.txt"))
633            .respond_with(ResponseTemplate::new(404))
634            .mount(&mock_server)
635            .await;
636
637        let fetcher = Fetcher::new()?;
638        let url = format!("{}/nonexistent.txt", mock_server.uri());
639
640        // Test 404 handling
641        let result = fetcher.fetch_with_cache(&url, None, None).await;
642
643        assert!(result.is_err(), "404 should result in error");
644
645        match result {
646            Err(Error::NotFound(msg)) => {
647                // Expected error type - 404 now maps to NotFound
648                assert!(msg.contains("not found"));
649                assert!(msg.contains("blz lookup"));
650            },
651            Err(e) => panic!("Expected NotFound error, got: {e}"),
652            Ok(_) => panic!("Expected error for 404 response"),
653        }
654
655        Ok(())
656    }
657
658    #[tokio::test]
659    async fn test_fetch_500_error() -> anyhow::Result<()> {
660        // Setup mock server
661        let mock_server = MockServer::start().await;
662
663        // Mock 500 Internal Server Error response
664        Mock::given(method("GET"))
665            .and(path("/error.txt"))
666            .respond_with(ResponseTemplate::new(500))
667            .mount(&mock_server)
668            .await;
669
670        let fetcher = Fetcher::new()?;
671        let url = format!("{}/error.txt", mock_server.uri());
672
673        // Test 500 handling
674        let result = fetcher.fetch_with_cache(&url, None, None).await;
675
676        assert!(result.is_err(), "500 should result in error");
677
678        match result {
679            Err(Error::Network(_)) => {
680                // Expected error type
681            },
682            Err(e) => panic!("Expected Network error, got: {e}"),
683            Ok(_) => panic!("Expected error for 500 response"),
684        }
685
686        Ok(())
687    }
688
689    #[tokio::test]
690    async fn test_fetch_timeout() -> anyhow::Result<()> {
691        // Setup mock server with very slow response
692        let mock_server = MockServer::start().await;
693
694        Mock::given(method("GET"))
695            .and(path("/slow.txt"))
696            .respond_with(
697                ResponseTemplate::new(200)
698                    .set_body_string("slow content")
699                    .set_delay(Duration::from_millis(500)), // Longer than custom client timeout (200ms)
700            )
701            .mount(&mock_server)
702            .await;
703
704        // Use a short timeout to keep test runtime fast
705        let fetcher = Fetcher::with_timeout(Duration::from_millis(200))?;
706        let url = format!("{}/slow.txt", mock_server.uri());
707
708        let start_time = std::time::Instant::now();
709        let result = fetcher.fetch_with_cache(&url, None, None).await;
710        let elapsed = start_time.elapsed();
711
712        // Should fail due to timeout
713        assert!(result.is_err(), "Slow request should timeout");
714        assert!(
715            elapsed < Duration::from_millis(500),
716            "Should timeout before server's 500ms delay"
717        );
718
719        Ok(())
720    }
721
722    #[tokio::test]
723    async fn test_fetch_simple_without_cache() -> anyhow::Result<()> {
724        // Setup mock server
725        let mock_server = MockServer::start().await;
726
727        let content = "# Simple Content\n\nThis is simple test content.";
728
729        Mock::given(method("GET"))
730            .and(path("/simple.txt"))
731            .respond_with(ResponseTemplate::new(200).set_body_string(content))
732            .mount(&mock_server)
733            .await;
734
735        let fetcher = Fetcher::new()?;
736        let url = format!("{}/simple.txt", mock_server.uri());
737
738        // Test simple fetch without cache headers
739        let (returned_content, sha256) = fetcher.fetch(&url).await?;
740
741        assert_eq!(returned_content, content);
742        assert!(!sha256.is_empty(), "SHA256 should be computed");
743
744        // Verify SHA256 is consistent
745        let expected_sha = calculate_sha256(content);
746        assert_eq!(sha256, expected_sha);
747
748        Ok(())
749    }
750
751    // Temporarily disabled - mock server setup needs adjustment
752    // #[tokio::test]
753    #[allow(dead_code)]
754    async fn test_fetch_with_both_etag_and_last_modified() -> anyhow::Result<()> {
755        // Setup mock server
756        let mock_server = MockServer::start().await;
757
758        // Mock response that checks both ETag and Last-Modified
759        Mock::given(method("GET"))
760            .and(path("/both.txt"))
761            .and(header("If-None-Match", "\"test-etag\""))
762            .and(header("If-Modified-Since", "Wed, 21 Oct 2015 07:28:00 GMT"))
763            .respond_with(ResponseTemplate::new(304))
764            .mount(&mock_server)
765            .await;
766
767        let fetcher = Fetcher::new()?;
768        let url = format!("{}/both.txt", mock_server.uri());
769
770        // Test with both cache headers
771        let result = fetcher
772            .fetch_with_cache(
773                &url,
774                Some("\"test-etag\""),
775                Some("Wed, 21 Oct 2015 07:28:00 GMT"),
776            )
777            .await?;
778
779        match result {
780            FetchResult::NotModified { .. } => {
781                // Expected result
782            },
783            _ => panic!("Expected NotModified result for matching cache headers"),
784        }
785
786        Ok(())
787    }
788
789    #[tokio::test]
790    async fn test_sha256_calculation() {
791        // Test the actual sha256 calculation with known values
792        let content = "Hello, World!";
793        let sha256 = calculate_sha256(content);
794
795        // The function returns base64-encoded SHA256
796        // Verify it's a valid base64 string of the right length
797        assert!(!sha256.is_empty());
798        assert_eq!(sha256.len(), 44); // Base64 encoded SHA256 is 44 chars
799
800        // Test empty string
801        let empty_sha256 = calculate_sha256("");
802        assert_eq!(empty_sha256, "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
803    }
804
805    #[tokio::test]
806    async fn test_check_flavors_empty_response() -> anyhow::Result<()> {
807        // Setup mock server that returns 404 for all flavors
808        let mock_server = MockServer::start().await;
809
810        // Mock 404 responses for all flavor checks
811        let flavors = [
812            "llms-full.txt",
813            "llms.txt",
814            "llms-mini.txt",
815            "llms-base.txt",
816        ];
817        for flavor in &flavors {
818            Mock::given(method("HEAD"))
819                .and(path(format!("/{flavor}")))
820                .respond_with(ResponseTemplate::new(404))
821                .mount(&mock_server)
822                .await;
823        }
824
825        let fetcher = Fetcher::new()?;
826        let url = format!("{}/llms.txt", mock_server.uri());
827
828        // Check flavors when none exist
829        let flavors = fetcher.check_flavors(&url).await?;
830
831        // Should return at least the original URL even if HEAD fails
832        assert_eq!(flavors.len(), 1);
833        assert_eq!(flavors[0].name, "llms.txt");
834        assert_eq!(flavors[0].size, None);
835
836        Ok(())
837    }
838
839    #[tokio::test]
840    async fn test_check_flavors_partial_availability() -> anyhow::Result<()> {
841        // Setup mock server with some flavors available
842        let mock_server = MockServer::start().await;
843
844        // Mock responses: full and regular available, mini and base not available
845        Mock::given(method("HEAD"))
846            .and(path("/llms-full.txt"))
847            .respond_with(ResponseTemplate::new(200).insert_header("content-length", "2048000"))
848            .mount(&mock_server)
849            .await;
850
851        Mock::given(method("HEAD"))
852            .and(path("/llms.txt"))
853            .respond_with(ResponseTemplate::new(200).insert_header("content-length", "1024000"))
854            .mount(&mock_server)
855            .await;
856
857        Mock::given(method("HEAD"))
858            .and(path("/llms-mini.txt"))
859            .respond_with(ResponseTemplate::new(404))
860            .mount(&mock_server)
861            .await;
862
863        Mock::given(method("HEAD"))
864            .and(path("/llms-base.txt"))
865            .respond_with(ResponseTemplate::new(404))
866            .mount(&mock_server)
867            .await;
868
869        let fetcher = Fetcher::new()?;
870        let url = format!("{}/llms.txt", mock_server.uri());
871
872        let flavors = fetcher.check_flavors(&url).await?;
873
874        // Should find 2 available flavors
875        assert_eq!(flavors.len(), 2);
876
877        // Should be sorted by preference
878        assert_eq!(flavors[0].name, "llms-full.txt");
879        assert_eq!(flavors[0].size, Some(2_048_000));
880
881        assert_eq!(flavors[1].name, "llms.txt");
882        assert_eq!(flavors[1].size, Some(1_024_000));
883
884        Ok(())
885    }
886
887    #[tokio::test]
888    async fn test_check_flavors_custom_filename() -> anyhow::Result<()> {
889        // Setup mock server
890        let mock_server = MockServer::start().await;
891
892        // Mock response for custom filename
893        Mock::given(method("HEAD"))
894            .and(path("/docs/llms-custom.txt"))
895            .respond_with(ResponseTemplate::new(200).insert_header("content-length", "512000"))
896            .mount(&mock_server)
897            .await;
898
899        // Mock 404 for standard flavors at this location
900        let standard_flavors = [
901            "llms-full.txt",
902            "llms.txt",
903            "llms-mini.txt",
904            "llms-base.txt",
905        ];
906        for flavor in &standard_flavors {
907            Mock::given(method("HEAD"))
908                .and(path(format!("/docs/{flavor}")))
909                .respond_with(ResponseTemplate::new(404))
910                .mount(&mock_server)
911                .await;
912        }
913
914        let fetcher = Fetcher::new()?;
915        let url = format!("{}/docs/llms-custom.txt", mock_server.uri());
916
917        let flavors = fetcher.check_flavors(&url).await?;
918
919        // Should include the custom flavor
920        assert!(!flavors.is_empty());
921        assert!(
922            flavors.iter().any(|f| f.name == "llms-custom.txt"),
923            "Should find custom flavor"
924        );
925
926        Ok(())
927    }
928
929    #[tokio::test]
930    async fn test_invalid_urls() -> anyhow::Result<()> {
931        let fetcher = Fetcher::new()?;
932
933        // Test completely invalid URLs
934        let invalid_urls = vec![
935            "not-a-url",
936            "ftp://invalid-protocol.com/llms.txt",
937            "",
938            "https://",
939        ];
940
941        for invalid_url in invalid_urls {
942            let result = fetcher.fetch_with_cache(invalid_url, None, None).await;
943            assert!(result.is_err(), "Invalid URL '{invalid_url}' should fail");
944        }
945
946        Ok(())
947    }
948
949    #[tokio::test]
950    async fn test_concurrent_requests() -> anyhow::Result<()> {
951        // Setup mock server
952        let mock_server = MockServer::start().await;
953
954        Mock::given(method("GET"))
955            .and(path("/concurrent.txt"))
956            .respond_with(ResponseTemplate::new(200).set_body_string("concurrent content"))
957            .mount(&mock_server)
958            .await;
959
960        let _fetcher = Fetcher::new()?;
961        let url = format!("{}/concurrent.txt", mock_server.uri());
962
963        // Make multiple concurrent requests
964        let mut handles = Vec::new();
965
966        for i in 0..10 {
967            let fetcher_clone = Fetcher::new()?;
968            let url_clone = url.clone();
969
970            handles.push(tokio::spawn(async move {
971                let result = fetcher_clone.fetch(&url_clone).await;
972                (i, result)
973            }));
974        }
975
976        // Wait for all requests
977        let results = futures::future::join_all(handles).await;
978
979        // All should succeed
980        for result in results {
981            let (index, fetch_result) = result.expect("Task should complete");
982
983            match fetch_result {
984                Ok((content, sha256)) => {
985                    assert_eq!(content, "concurrent content");
986                    assert!(!sha256.is_empty());
987                },
988                Err(e) => panic!("Request {index} should succeed: {e}"),
989            }
990        }
991
992        Ok(())
993    }
994}