1use crate::{Error, Result};
2use base64::{Engine, engine::general_purpose::STANDARD};
3use reqwest::header::{CONTENT_LENGTH, ETAG, IF_MODIFIED_SINCE, IF_NONE_MATCH, LAST_MODIFIED};
4use reqwest::{Client, StatusCode};
5use sha2::{Digest, Sha256};
6use std::time::Duration;
7use tracing::{debug, info};
8
9pub struct Fetcher {
11 client: Client,
12}
13
14impl Fetcher {
15 pub fn new() -> Result<Self> {
17 Self::with_timeout(Duration::from_secs(30))
18 }
19
20 pub fn with_timeout(timeout: Duration) -> Result<Self> {
22 let client = Client::builder()
23 .timeout(timeout)
24 .user_agent(concat!("outfitter-blz/", env!("CARGO_PKG_VERSION")))
25 .gzip(true)
26 .brotli(true)
27 .build()
28 .map_err(Error::Network)?;
29 Ok(Self { client })
30 }
31
32 pub async fn fetch_with_cache(
34 &self,
35 url: &str,
36 etag: Option<&str>,
37 last_modified: Option<&str>,
38 ) -> Result<FetchResult> {
39 let mut request = self.client.get(url);
40
41 if let Some(tag) = etag {
42 debug!("Setting If-None-Match: {}", tag);
43 request = request.header(IF_NONE_MATCH, tag);
44 }
45
46 if let Some(lm) = last_modified {
47 debug!("Setting If-Modified-Since: {}", lm);
48 request = request.header(IF_MODIFIED_SINCE, lm);
49 }
50
51 let response = request.send().await?;
52 let status = response.status();
53
54 if status == StatusCode::NOT_MODIFIED {
55 info!("Resource not modified (304) for {}", url);
56
57 let etag = response
59 .headers()
60 .get(ETAG)
61 .and_then(|v| v.to_str().ok())
62 .map(std::string::ToString::to_string);
63
64 let last_modified = response
65 .headers()
66 .get(LAST_MODIFIED)
67 .and_then(|v| v.to_str().ok())
68 .map(std::string::ToString::to_string);
69
70 return Ok(FetchResult::NotModified {
71 etag,
72 last_modified,
73 });
74 }
75
76 if !status.is_success() {
77 if status == StatusCode::NOT_FOUND {
79 return Err(Error::NotFound(format!(
80 "Resource not found at '{url}'. Check the URL or try 'blz lookup' to find available sources"
81 )));
82 }
83
84 match response.error_for_status() {
86 Ok(_) => unreachable!("Status should be an error"),
87 Err(err) => return Err(Error::Network(err)),
88 }
89 }
90
91 let new_etag = response
92 .headers()
93 .get(ETAG)
94 .and_then(|v| v.to_str().ok())
95 .map(std::string::ToString::to_string);
96
97 let new_last_modified = response
98 .headers()
99 .get(LAST_MODIFIED)
100 .and_then(|v| v.to_str().ok())
101 .map(std::string::ToString::to_string);
102
103 let content = response.text().await?;
104 let sha256 = calculate_sha256(&content);
105
106 info!("Fetched {} bytes from {}", content.len(), url);
107
108 Ok(FetchResult::Modified {
109 content,
110 etag: new_etag,
111 last_modified: new_last_modified,
112 sha256,
113 })
114 }
115
116 pub async fn fetch(&self, url: &str) -> Result<(String, String)> {
118 let response = self.client.get(url).send().await?;
119 let status = response.status();
120
121 if !status.is_success() {
122 if status == StatusCode::NOT_FOUND {
124 return Err(Error::NotFound(format!(
125 "Resource not found at '{url}'. Check the URL or try 'blz lookup' to find available sources"
126 )));
127 }
128
129 match response.error_for_status() {
131 Ok(_) => unreachable!("Status should be an error"),
132 Err(err) => return Err(Error::Network(err)),
133 }
134 }
135
136 let content = response.text().await?;
137 let sha256 = calculate_sha256(&content);
138
139 Ok((content, sha256))
140 }
141
142 pub async fn check_flavors(&self, url: &str) -> Result<Vec<FlavorInfo>> {
144 let mut flavors = Vec::new();
145 let base_url = extract_base_url(url);
146
147 let flavor_names = vec![
149 "llms-full.txt",
150 "llms.txt",
151 "llms-mini.txt",
152 "llms-base.txt",
153 ];
154
155 for flavor_name in flavor_names {
156 let flavor_url = format!("{base_url}/{flavor_name}");
157
158 match self.client.head(&flavor_url).send().await {
160 Ok(response) => {
161 if response.status().is_success() {
162 let size = response
163 .headers()
164 .get(CONTENT_LENGTH)
165 .and_then(|v| v.to_str().ok())
166 .and_then(|s| s.parse::<u64>().ok());
167
168 flavors.push(FlavorInfo {
169 name: flavor_name.to_string(),
170 size,
171 url: flavor_url,
172 });
173 }
174 },
175 Err(e) => {
176 debug!("Failed to check flavor {}: {}", flavor_name, e);
177 if url.ends_with(flavor_name) {
179 flavors.push(FlavorInfo {
180 name: flavor_name.to_string(),
181 size: None,
182 url: url.to_string(),
183 });
184 }
185 },
186 }
187 }
188
189 if let Some(filename) = url.split('/').next_back() {
191 let clean_filename = filename
193 .split('?')
194 .next()
195 .unwrap_or(filename)
196 .split('#')
197 .next()
198 .unwrap_or(filename);
199
200 if clean_filename.starts_with("llms")
201 && std::path::Path::new(clean_filename)
202 .extension()
203 .is_some_and(|ext| ext.eq_ignore_ascii_case("txt"))
204 && !flavors.iter().any(|f| f.name == filename)
205 {
206 flavors.push(FlavorInfo {
207 name: filename.to_string(),
208 size: None,
209 url: url.to_string(),
210 });
211 }
212 }
213
214 flavors.sort_by(|a, b| {
216 let order_a = match a.name.as_str() {
217 "llms-full.txt" => 0,
218 "llms.txt" => 1,
219 "llms-mini.txt" => 2,
220 "llms-base.txt" => 3,
221 _ => 4,
222 };
223 let order_b = match b.name.as_str() {
224 "llms-full.txt" => 0,
225 "llms.txt" => 1,
226 "llms-mini.txt" => 2,
227 "llms-base.txt" => 3,
228 _ => 4,
229 };
230 order_a.cmp(&order_b)
231 });
232
233 Ok(flavors)
234 }
235
236 pub async fn head_metadata(&self, url: &str) -> Result<HeadInfo> {
238 let response = self.client.head(url).send().await?;
239 let status = response.status();
240
241 let content_length = response
242 .headers()
243 .get(CONTENT_LENGTH)
244 .and_then(|v| v.to_str().ok())
245 .and_then(|s| s.parse::<u64>().ok());
246
247 let etag = response
248 .headers()
249 .get(ETAG)
250 .and_then(|v| v.to_str().ok())
251 .map(std::string::ToString::to_string);
252
253 let last_modified = response
254 .headers()
255 .get(LAST_MODIFIED)
256 .and_then(|v| v.to_str().ok())
257 .map(std::string::ToString::to_string);
258
259 Ok(HeadInfo {
260 status: status.as_u16(),
261 content_length,
262 etag,
263 last_modified,
264 })
265 }
266}
267
268#[derive(Debug, Clone)]
270pub struct HeadInfo {
271 pub status: u16,
273 pub content_length: Option<u64>,
275 pub etag: Option<String>,
277 pub last_modified: Option<String>,
279}
280
281pub enum FetchResult {
283 NotModified {
285 etag: Option<String>,
287 last_modified: Option<String>,
289 },
290 Modified {
292 content: String,
294 etag: Option<String>,
296 last_modified: Option<String>,
298 sha256: String,
300 },
301}
302
303#[derive(Debug, Clone)]
305pub struct FlavorInfo {
306 pub name: String,
308 pub size: Option<u64>,
310 pub url: String,
312}
313
314impl std::fmt::Display for FlavorInfo {
315 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316 if let Some(size) = self.size {
317 write!(f, "{} ({})", self.name, format_size(size))
318 } else {
319 write!(f, "{}", self.name)
320 }
321 }
322}
323
324fn calculate_sha256(content: &str) -> String {
325 let mut hasher = Sha256::new();
326 hasher.update(content.as_bytes());
327 let result = hasher.finalize();
328 STANDARD.encode(result)
329}
330
331fn extract_base_url(url: &str) -> String {
332 url.rfind('/').map_or_else(
334 || url.to_string(),
335 |last_slash| {
336 let start_pos = last_slash.saturating_sub(2);
337 if url.len() > 3 && &url[start_pos..=last_slash] == "://" {
338 url.to_string()
339 } else {
340 url[..last_slash].to_string()
341 }
342 },
343 )
344}
345
346fn format_size(bytes: u64) -> String {
347 const UNITS: &[&str] = &["B", "KB", "MB", "GB"];
348 #[allow(clippy::cast_precision_loss)]
349 let mut size = bytes as f64;
350 let mut unit_index = 0;
351
352 while size >= 1024.0 && unit_index < UNITS.len() - 1 {
353 size /= 1024.0;
354 unit_index += 1;
355 }
356
357 if unit_index == 0 {
358 format!("{} {}", bytes, UNITS[unit_index])
359 } else {
360 format!("{:.1} {}", size, UNITS[unit_index])
361 }
362}
363
364#[cfg(test)]
368#[allow(
369 clippy::unwrap_used,
370 clippy::panic,
371 clippy::disallowed_macros,
372 clippy::match_wildcard_for_single_variants
373)]
374mod tests {
375 use super::*;
376 use std::time::Duration;
377 use wiremock::{
378 Mock, MockServer, ResponseTemplate,
379 matchers::{header, method, path},
380 };
381
382 #[test]
383 fn test_extract_base_url() {
384 assert_eq!(
385 extract_base_url("https://example.com/llms.txt"),
386 "https://example.com"
387 );
388 assert_eq!(
389 extract_base_url("https://api.example.com/v1/docs/llms.txt"),
390 "https://api.example.com/v1/docs"
391 );
392 assert_eq!(
393 extract_base_url("https://example.com/"),
394 "https://example.com"
395 );
396 assert_eq!(
397 extract_base_url("https://example.com"),
398 "https://example.com"
399 );
400 }
401
402 #[test]
403 fn test_extract_base_url_edge_cases() {
404 assert_eq!(
406 extract_base_url("https://example.com/docs/api/v1/llms.txt"),
407 "https://example.com/docs/api/v1"
408 );
409
410 assert_eq!(
412 extract_base_url("https://example.com/llms.txt?version=1"),
413 "https://example.com"
414 );
415
416 assert_eq!(
418 extract_base_url("https://example.com/docs/llms.txt#section"),
419 "https://example.com/docs"
420 );
421
422 assert_eq!(
424 extract_base_url("https://example.com"),
425 "https://example.com"
426 );
427 assert_eq!(extract_base_url("http://localhost"), "http://localhost");
428
429 assert_eq!(extract_base_url("https://test.com"), "https://test.com");
431 }
432
433 #[test]
434 fn test_format_size() {
435 assert_eq!(format_size(0), "0 B");
436 assert_eq!(format_size(512), "512 B");
437 assert_eq!(format_size(1024), "1.0 KB");
438 assert_eq!(format_size(1536), "1.5 KB");
439 assert_eq!(format_size(1_048_576), "1.0 MB");
440 assert_eq!(format_size(1_572_864), "1.5 MB");
441 assert_eq!(format_size(1_073_741_824), "1.0 GB");
442 assert_eq!(format_size(2_147_483_648), "2.0 GB");
443 }
444
445 #[test]
446 fn test_format_size_boundary_values() {
447 assert_eq!(format_size(1), "1 B");
449 assert_eq!(format_size(1023), "1023 B");
450 assert_eq!(format_size(1025), "1.0 KB");
451 assert_eq!(format_size(1024 * 1024 - 1), "1024.0 KB");
452 assert_eq!(format_size(1024 * 1024 + 1), "1.0 MB");
453
454 let huge_size = 1024u64 * 1024 * 1024 * 1024; let formatted = format_size(huge_size);
457 assert!(formatted.contains("GB")); let max_size = u64::MAX;
461 let max_formatted = format_size(max_size);
462 assert!(!max_formatted.is_empty());
463 }
464
465 #[test]
466 fn test_flavor_info_display() {
467 let flavor_with_size = FlavorInfo {
468 name: "llms-full.txt".to_string(),
469 size: Some(892_000),
470 url: "https://example.com/llms-full.txt".to_string(),
471 };
472 assert_eq!(format!("{flavor_with_size}"), "llms-full.txt (871.1 KB)");
473
474 let flavor_no_size = FlavorInfo {
475 name: "llms.txt".to_string(),
476 size: None,
477 url: "https://example.com/llms.txt".to_string(),
478 };
479 assert_eq!(format!("{flavor_no_size}"), "llms.txt");
480 }
481
482 #[test]
483 fn test_flavor_info_display_various_sizes() {
484 let test_cases = vec![
485 (0, "llms.txt (0 B)"),
486 (1024, "llms.txt (1.0 KB)"),
487 (1_048_576, "llms.txt (1.0 MB)"),
488 (1_073_741_824, "llms.txt (1.0 GB)"),
489 ];
490
491 for (size, expected) in test_cases {
492 let flavor = FlavorInfo {
493 name: "llms.txt".to_string(),
494 size: Some(size),
495 url: "https://example.com/llms.txt".to_string(),
496 };
497 assert_eq!(format!("{flavor}"), expected);
498 }
499 }
500
501 #[tokio::test]
502 async fn test_fetcher_creation() {
503 let result = Fetcher::new();
505 assert!(result.is_ok(), "Fetcher creation should succeed");
506
507 let _fetcher = result.unwrap();
508 }
511
512 #[tokio::test]
513 async fn test_fetch_with_etag_not_modified() -> anyhow::Result<()> {
514 let mock_server = MockServer::start().await;
516
517 Mock::given(method("GET"))
519 .and(path("/llms.txt"))
520 .and(header("If-None-Match", "\"test-etag\""))
521 .respond_with(ResponseTemplate::new(304))
522 .mount(&mock_server)
523 .await;
524
525 let fetcher = Fetcher::new()?;
526 let url = format!("{}/llms.txt", mock_server.uri());
527
528 let result = fetcher
530 .fetch_with_cache(&url, Some("\"test-etag\""), None)
531 .await?;
532
533 match result {
534 FetchResult::NotModified { .. } => {
535 },
537 _ => panic!("Expected NotModified result for matching ETag"),
538 }
539
540 Ok(())
541 }
542
543 #[tokio::test]
544 async fn test_fetch_with_etag_modified() -> anyhow::Result<()> {
545 let mock_server = MockServer::start().await;
547
548 let content = "# Test Content\n\nThis is test content.";
549
550 Mock::given(method("GET"))
552 .and(path("/llms.txt"))
553 .and(header("If-None-Match", "\"old-etag\""))
554 .respond_with(
555 ResponseTemplate::new(200)
556 .set_body_string(content)
557 .insert_header("etag", "\"new-etag\"")
558 .insert_header("last-modified", "Wed, 21 Oct 2015 07:28:00 GMT"),
559 )
560 .mount(&mock_server)
561 .await;
562
563 let fetcher = Fetcher::new()?;
564 let url = format!("{}/llms.txt", mock_server.uri());
565
566 let result = fetcher
568 .fetch_with_cache(&url, Some("\"old-etag\""), None)
569 .await?;
570
571 match result {
572 FetchResult::Modified {
573 content: returned_content,
574 etag,
575 last_modified,
576 sha256,
577 } => {
578 assert_eq!(returned_content, content);
579 assert_eq!(etag, Some("\"new-etag\"".to_string()));
580 assert_eq!(
581 last_modified,
582 Some("Wed, 21 Oct 2015 07:28:00 GMT".to_string())
583 );
584 assert!(!sha256.is_empty(), "SHA256 should be computed");
585 },
586 _ => panic!("Expected Modified result for non-matching ETag"),
587 }
588
589 Ok(())
590 }
591
592 #[allow(dead_code)]
595 async fn test_fetch_with_last_modified() -> anyhow::Result<()> {
596 let mock_server = MockServer::start().await;
598
599 Mock::given(method("GET"))
601 .and(path("/llms.txt"))
602 .and(header("If-Modified-Since", "Wed, 21 Oct 2015 07:28:00 GMT"))
603 .respond_with(ResponseTemplate::new(304))
604 .mount(&mock_server)
605 .await;
606
607 let fetcher = Fetcher::new()?;
608 let url = format!("{}/llms.txt", mock_server.uri());
609
610 let result = fetcher
612 .fetch_with_cache(&url, None, Some("Wed, 21 Oct 2015 07:28:00 GMT"))
613 .await?;
614
615 match result {
616 FetchResult::NotModified { .. } => {
617 },
619 _ => panic!("Expected NotModified result for matching Last-Modified"),
620 }
621
622 Ok(())
623 }
624
625 #[tokio::test]
626 async fn test_fetch_404_error() -> anyhow::Result<()> {
627 let mock_server = MockServer::start().await;
629
630 Mock::given(method("GET"))
632 .and(path("/nonexistent.txt"))
633 .respond_with(ResponseTemplate::new(404))
634 .mount(&mock_server)
635 .await;
636
637 let fetcher = Fetcher::new()?;
638 let url = format!("{}/nonexistent.txt", mock_server.uri());
639
640 let result = fetcher.fetch_with_cache(&url, None, None).await;
642
643 assert!(result.is_err(), "404 should result in error");
644
645 match result {
646 Err(Error::NotFound(msg)) => {
647 assert!(msg.contains("not found"));
649 assert!(msg.contains("blz lookup"));
650 },
651 Err(e) => panic!("Expected NotFound error, got: {e}"),
652 Ok(_) => panic!("Expected error for 404 response"),
653 }
654
655 Ok(())
656 }
657
658 #[tokio::test]
659 async fn test_fetch_500_error() -> anyhow::Result<()> {
660 let mock_server = MockServer::start().await;
662
663 Mock::given(method("GET"))
665 .and(path("/error.txt"))
666 .respond_with(ResponseTemplate::new(500))
667 .mount(&mock_server)
668 .await;
669
670 let fetcher = Fetcher::new()?;
671 let url = format!("{}/error.txt", mock_server.uri());
672
673 let result = fetcher.fetch_with_cache(&url, None, None).await;
675
676 assert!(result.is_err(), "500 should result in error");
677
678 match result {
679 Err(Error::Network(_)) => {
680 },
682 Err(e) => panic!("Expected Network error, got: {e}"),
683 Ok(_) => panic!("Expected error for 500 response"),
684 }
685
686 Ok(())
687 }
688
689 #[tokio::test]
690 async fn test_fetch_timeout() -> anyhow::Result<()> {
691 let mock_server = MockServer::start().await;
693
694 Mock::given(method("GET"))
695 .and(path("/slow.txt"))
696 .respond_with(
697 ResponseTemplate::new(200)
698 .set_body_string("slow content")
699 .set_delay(Duration::from_millis(500)), )
701 .mount(&mock_server)
702 .await;
703
704 let fetcher = Fetcher::with_timeout(Duration::from_millis(200))?;
706 let url = format!("{}/slow.txt", mock_server.uri());
707
708 let start_time = std::time::Instant::now();
709 let result = fetcher.fetch_with_cache(&url, None, None).await;
710 let elapsed = start_time.elapsed();
711
712 assert!(result.is_err(), "Slow request should timeout");
714 assert!(
715 elapsed < Duration::from_millis(500),
716 "Should timeout before server's 500ms delay"
717 );
718
719 Ok(())
720 }
721
722 #[tokio::test]
723 async fn test_fetch_simple_without_cache() -> anyhow::Result<()> {
724 let mock_server = MockServer::start().await;
726
727 let content = "# Simple Content\n\nThis is simple test content.";
728
729 Mock::given(method("GET"))
730 .and(path("/simple.txt"))
731 .respond_with(ResponseTemplate::new(200).set_body_string(content))
732 .mount(&mock_server)
733 .await;
734
735 let fetcher = Fetcher::new()?;
736 let url = format!("{}/simple.txt", mock_server.uri());
737
738 let (returned_content, sha256) = fetcher.fetch(&url).await?;
740
741 assert_eq!(returned_content, content);
742 assert!(!sha256.is_empty(), "SHA256 should be computed");
743
744 let expected_sha = calculate_sha256(content);
746 assert_eq!(sha256, expected_sha);
747
748 Ok(())
749 }
750
751 #[allow(dead_code)]
754 async fn test_fetch_with_both_etag_and_last_modified() -> anyhow::Result<()> {
755 let mock_server = MockServer::start().await;
757
758 Mock::given(method("GET"))
760 .and(path("/both.txt"))
761 .and(header("If-None-Match", "\"test-etag\""))
762 .and(header("If-Modified-Since", "Wed, 21 Oct 2015 07:28:00 GMT"))
763 .respond_with(ResponseTemplate::new(304))
764 .mount(&mock_server)
765 .await;
766
767 let fetcher = Fetcher::new()?;
768 let url = format!("{}/both.txt", mock_server.uri());
769
770 let result = fetcher
772 .fetch_with_cache(
773 &url,
774 Some("\"test-etag\""),
775 Some("Wed, 21 Oct 2015 07:28:00 GMT"),
776 )
777 .await?;
778
779 match result {
780 FetchResult::NotModified { .. } => {
781 },
783 _ => panic!("Expected NotModified result for matching cache headers"),
784 }
785
786 Ok(())
787 }
788
789 #[tokio::test]
790 async fn test_sha256_calculation() {
791 let content = "Hello, World!";
793 let sha256 = calculate_sha256(content);
794
795 assert!(!sha256.is_empty());
798 assert_eq!(sha256.len(), 44); let empty_sha256 = calculate_sha256("");
802 assert_eq!(empty_sha256, "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
803 }
804
805 #[tokio::test]
806 async fn test_check_flavors_empty_response() -> anyhow::Result<()> {
807 let mock_server = MockServer::start().await;
809
810 let flavors = [
812 "llms-full.txt",
813 "llms.txt",
814 "llms-mini.txt",
815 "llms-base.txt",
816 ];
817 for flavor in &flavors {
818 Mock::given(method("HEAD"))
819 .and(path(format!("/{flavor}")))
820 .respond_with(ResponseTemplate::new(404))
821 .mount(&mock_server)
822 .await;
823 }
824
825 let fetcher = Fetcher::new()?;
826 let url = format!("{}/llms.txt", mock_server.uri());
827
828 let flavors = fetcher.check_flavors(&url).await?;
830
831 assert_eq!(flavors.len(), 1);
833 assert_eq!(flavors[0].name, "llms.txt");
834 assert_eq!(flavors[0].size, None);
835
836 Ok(())
837 }
838
839 #[tokio::test]
840 async fn test_check_flavors_partial_availability() -> anyhow::Result<()> {
841 let mock_server = MockServer::start().await;
843
844 Mock::given(method("HEAD"))
846 .and(path("/llms-full.txt"))
847 .respond_with(ResponseTemplate::new(200).insert_header("content-length", "2048000"))
848 .mount(&mock_server)
849 .await;
850
851 Mock::given(method("HEAD"))
852 .and(path("/llms.txt"))
853 .respond_with(ResponseTemplate::new(200).insert_header("content-length", "1024000"))
854 .mount(&mock_server)
855 .await;
856
857 Mock::given(method("HEAD"))
858 .and(path("/llms-mini.txt"))
859 .respond_with(ResponseTemplate::new(404))
860 .mount(&mock_server)
861 .await;
862
863 Mock::given(method("HEAD"))
864 .and(path("/llms-base.txt"))
865 .respond_with(ResponseTemplate::new(404))
866 .mount(&mock_server)
867 .await;
868
869 let fetcher = Fetcher::new()?;
870 let url = format!("{}/llms.txt", mock_server.uri());
871
872 let flavors = fetcher.check_flavors(&url).await?;
873
874 assert_eq!(flavors.len(), 2);
876
877 assert_eq!(flavors[0].name, "llms-full.txt");
879 assert_eq!(flavors[0].size, Some(2_048_000));
880
881 assert_eq!(flavors[1].name, "llms.txt");
882 assert_eq!(flavors[1].size, Some(1_024_000));
883
884 Ok(())
885 }
886
887 #[tokio::test]
888 async fn test_check_flavors_custom_filename() -> anyhow::Result<()> {
889 let mock_server = MockServer::start().await;
891
892 Mock::given(method("HEAD"))
894 .and(path("/docs/llms-custom.txt"))
895 .respond_with(ResponseTemplate::new(200).insert_header("content-length", "512000"))
896 .mount(&mock_server)
897 .await;
898
899 let standard_flavors = [
901 "llms-full.txt",
902 "llms.txt",
903 "llms-mini.txt",
904 "llms-base.txt",
905 ];
906 for flavor in &standard_flavors {
907 Mock::given(method("HEAD"))
908 .and(path(format!("/docs/{flavor}")))
909 .respond_with(ResponseTemplate::new(404))
910 .mount(&mock_server)
911 .await;
912 }
913
914 let fetcher = Fetcher::new()?;
915 let url = format!("{}/docs/llms-custom.txt", mock_server.uri());
916
917 let flavors = fetcher.check_flavors(&url).await?;
918
919 assert!(!flavors.is_empty());
921 assert!(
922 flavors.iter().any(|f| f.name == "llms-custom.txt"),
923 "Should find custom flavor"
924 );
925
926 Ok(())
927 }
928
929 #[tokio::test]
930 async fn test_invalid_urls() -> anyhow::Result<()> {
931 let fetcher = Fetcher::new()?;
932
933 let invalid_urls = vec![
935 "not-a-url",
936 "ftp://invalid-protocol.com/llms.txt",
937 "",
938 "https://",
939 ];
940
941 for invalid_url in invalid_urls {
942 let result = fetcher.fetch_with_cache(invalid_url, None, None).await;
943 assert!(result.is_err(), "Invalid URL '{invalid_url}' should fail");
944 }
945
946 Ok(())
947 }
948
949 #[tokio::test]
950 async fn test_concurrent_requests() -> anyhow::Result<()> {
951 let mock_server = MockServer::start().await;
953
954 Mock::given(method("GET"))
955 .and(path("/concurrent.txt"))
956 .respond_with(ResponseTemplate::new(200).set_body_string("concurrent content"))
957 .mount(&mock_server)
958 .await;
959
960 let _fetcher = Fetcher::new()?;
961 let url = format!("{}/concurrent.txt", mock_server.uri());
962
963 let mut handles = Vec::new();
965
966 for i in 0..10 {
967 let fetcher_clone = Fetcher::new()?;
968 let url_clone = url.clone();
969
970 handles.push(tokio::spawn(async move {
971 let result = fetcher_clone.fetch(&url_clone).await;
972 (i, result)
973 }));
974 }
975
976 let results = futures::future::join_all(handles).await;
978
979 for result in results {
981 let (index, fetch_result) = result.expect("Task should complete");
982
983 match fetch_result {
984 Ok((content, sha256)) => {
985 assert_eq!(content, "concurrent content");
986 assert!(!sha256.is_empty());
987 },
988 Err(e) => panic!("Request {index} should succeed: {e}"),
989 }
990 }
991
992 Ok(())
993 }
994}