Skip to main content

alimentar/backend/
http.rs

1//! HTTP/HTTPS storage backend (read-only).
2//!
3//! Provides read-only access to datasets hosted on HTTP/HTTPS servers.
4//! Useful for accessing public datasets without requiring cloud credentials.
5
6use bytes::Bytes;
7use reqwest::{
8    blocking::Client,
9    header::{CONTENT_LENGTH, RANGE},
10};
11
12use super::StorageBackend;
13use crate::error::{Error, Result};
14
15// ============================================================================
16// HTTP Client Trait for Dependency Injection and Testing
17// ============================================================================
18
19/// Response from an HTTP operation.
20#[derive(Debug, Clone)]
21pub struct HttpResponse {
22    /// HTTP status code.
23    pub status: u16,
24    /// Whether the status indicates success (2xx).
25    pub is_success: bool,
26    /// Response body bytes.
27    pub body: Bytes,
28    /// Content-Length header value, if present.
29    pub content_length: Option<u64>,
30    /// Accept-Ranges header value, if present.
31    pub accept_ranges: Option<String>,
32}
33
34impl HttpResponse {
35    /// Creates a successful response with the given body.
36    #[cfg(test)]
37    pub fn ok(body: impl Into<Bytes>) -> Self {
38        Self {
39            status: 200,
40            is_success: true,
41            body: body.into(),
42            content_length: None,
43            accept_ranges: None,
44        }
45    }
46
47    /// Creates a 404 Not Found response.
48    #[cfg(test)]
49    pub fn not_found() -> Self {
50        Self {
51            status: 404,
52            is_success: false,
53            body: Bytes::new(),
54            content_length: None,
55            accept_ranges: None,
56        }
57    }
58
59    /// Creates a 206 Partial Content response.
60    #[cfg(test)]
61    pub fn partial_content(body: impl Into<Bytes>) -> Self {
62        Self {
63            status: 206,
64            is_success: true,
65            body: body.into(),
66            content_length: None,
67            accept_ranges: Some("bytes".to_string()),
68        }
69    }
70
71    /// Sets the content length.
72    #[cfg(test)]
73    pub fn with_content_length(mut self, length: u64) -> Self {
74        self.content_length = Some(length);
75        self
76    }
77
78    /// Sets accept ranges.
79    #[cfg(test)]
80    pub fn with_accept_ranges(mut self, value: impl Into<String>) -> Self {
81        self.accept_ranges = Some(value.into());
82        self
83    }
84}
85
86/// Trait for HTTP client operations.
87///
88/// This trait abstracts HTTP operations to allow for testing with mock
89/// implementations. The real implementation uses reqwest, while tests can use
90/// `MockHttpClient`.
91pub trait HttpClient: Send + Sync {
92    /// Performs an HTTP GET request.
93    fn get(&self, url: &str) -> Result<HttpResponse>;
94
95    /// Performs an HTTP HEAD request.
96    fn head(&self, url: &str) -> Result<HttpResponse>;
97
98    /// Performs an HTTP GET request with a Range header.
99    fn get_range(&self, url: &str, start: u64, end: u64) -> Result<HttpResponse>;
100}
101
102/// Real HTTP client implementation using reqwest.
103#[derive(Debug)]
104pub struct ReqwestHttpClient {
105    client: Client,
106}
107
108impl ReqwestHttpClient {
109    /// Creates a new reqwest-based HTTP client.
110    pub fn new() -> Result<Self> {
111        let client = Client::builder()
112            .user_agent("alimentar/0.1.0")
113            .build()
114            .map_err(|e| Error::storage(format!("Failed to create HTTP client: {e}")))?;
115
116        Ok(Self { client })
117    }
118
119    /// Creates a new reqwest-based HTTP client with a timeout.
120    pub fn with_timeout(timeout_secs: u64) -> Result<Self> {
121        let client = Client::builder()
122            .user_agent("alimentar/0.1.0")
123            .timeout(std::time::Duration::from_secs(timeout_secs))
124            .build()
125            .map_err(|e| Error::storage(format!("Failed to create HTTP client: {e}")))?;
126
127        Ok(Self { client })
128    }
129}
130
131impl HttpClient for ReqwestHttpClient {
132    fn get(&self, url: &str) -> Result<HttpResponse> {
133        let response = self
134            .client
135            .get(url)
136            .send()
137            .map_err(|e| Error::storage(format!("HTTP GET error for '{}': {}", url, e)))?;
138
139        let status = response.status().as_u16();
140        let is_success = response.status().is_success();
141        let content_length = response
142            .headers()
143            .get(CONTENT_LENGTH)
144            .and_then(|h| h.to_str().ok())
145            .and_then(|s| s.parse().ok());
146        let accept_ranges = response
147            .headers()
148            .get("accept-ranges")
149            .and_then(|h| h.to_str().ok())
150            .map(|s| s.to_string());
151
152        let body = response
153            .bytes()
154            .map_err(|e| Error::storage(format!("Failed to read HTTP response body: {e}")))?;
155
156        Ok(HttpResponse {
157            status,
158            is_success,
159            body,
160            content_length,
161            accept_ranges,
162        })
163    }
164
165    fn head(&self, url: &str) -> Result<HttpResponse> {
166        let response = self
167            .client
168            .head(url)
169            .send()
170            .map_err(|e| Error::storage(format!("HTTP HEAD error for '{}': {}", url, e)))?;
171
172        let status = response.status().as_u16();
173        let is_success = response.status().is_success();
174        let content_length = response
175            .headers()
176            .get(CONTENT_LENGTH)
177            .and_then(|h| h.to_str().ok())
178            .and_then(|s| s.parse().ok());
179        let accept_ranges = response
180            .headers()
181            .get("accept-ranges")
182            .and_then(|h| h.to_str().ok())
183            .map(|s| s.to_string());
184
185        Ok(HttpResponse {
186            status,
187            is_success,
188            body: Bytes::new(),
189            content_length,
190            accept_ranges,
191        })
192    }
193
194    fn get_range(&self, url: &str, start: u64, end: u64) -> Result<HttpResponse> {
195        let range_header = format!("bytes={}-{}", start, end);
196
197        let response = self
198            .client
199            .get(url)
200            .header(RANGE, range_header)
201            .send()
202            .map_err(|e| Error::storage(format!("HTTP GET range error for '{}': {}", url, e)))?;
203
204        let status = response.status().as_u16();
205        let is_success = status == 206 || response.status().is_success();
206        let content_length = response
207            .headers()
208            .get(CONTENT_LENGTH)
209            .and_then(|h| h.to_str().ok())
210            .and_then(|s| s.parse().ok());
211        let accept_ranges = response
212            .headers()
213            .get("accept-ranges")
214            .and_then(|h| h.to_str().ok())
215            .map(|s| s.to_string());
216
217        let body = response
218            .bytes()
219            .map_err(|e| Error::storage(format!("Failed to read HTTP response body: {e}")))?;
220
221        Ok(HttpResponse {
222            status,
223            is_success,
224            body,
225            content_length,
226            accept_ranges,
227        })
228    }
229}
230
231/// Mock HTTP client for testing.
232///
233/// This client allows tests to configure expected responses for specific URLs
234/// without making actual HTTP requests.
235#[cfg(test)]
236#[derive(Debug, Default, Clone)]
237pub struct MockHttpClient {
238    /// Responses to return for GET requests, keyed by URL.
239    get_responses: std::collections::HashMap<String, HttpResponse>,
240    /// Responses to return for HEAD requests, keyed by URL.
241    head_responses: std::collections::HashMap<String, HttpResponse>,
242    /// Default response for URLs not in the map.
243    default_response: Option<HttpResponse>,
244}
245
246#[cfg(test)]
247impl MockHttpClient {
248    /// Creates a new mock HTTP client.
249    pub fn new() -> Self {
250        Self::default()
251    }
252
253    /// Adds a GET response for a specific URL.
254    pub fn with_get_response(mut self, url: impl Into<String>, response: HttpResponse) -> Self {
255        self.get_responses.insert(url.into(), response);
256        self
257    }
258
259    /// Adds a HEAD response for a specific URL.
260    pub fn with_head_response(mut self, url: impl Into<String>, response: HttpResponse) -> Self {
261        self.head_responses.insert(url.into(), response);
262        self
263    }
264
265    /// Sets the default response for URLs not in the map.
266    pub fn with_default_response(mut self, response: HttpResponse) -> Self {
267        self.default_response = Some(response);
268        self
269    }
270}
271
272#[cfg(test)]
273impl HttpClient for MockHttpClient {
274    fn get(&self, url: &str) -> Result<HttpResponse> {
275        if let Some(response) = self.get_responses.get(url) {
276            return Ok(response.clone());
277        }
278        if let Some(ref default) = self.default_response {
279            return Ok(default.clone());
280        }
281        Err(Error::storage(format!("No mock response for GET {}", url)))
282    }
283
284    fn head(&self, url: &str) -> Result<HttpResponse> {
285        if let Some(response) = self.head_responses.get(url) {
286            return Ok(response.clone());
287        }
288        if let Some(ref default) = self.default_response {
289            return Ok(default.clone());
290        }
291        Err(Error::storage(format!("No mock response for HEAD {}", url)))
292    }
293
294    fn get_range(&self, url: &str, _start: u64, _end: u64) -> Result<HttpResponse> {
295        // Use same responses as GET for simplicity
296        self.get(url)
297    }
298}
299
300/// A read-only storage backend using HTTP/HTTPS.
301///
302/// This backend is designed for accessing publicly hosted datasets
303/// over HTTP/HTTPS. It supports range requests for efficient partial
304/// reads when the server supports them.
305///
306/// # Limitations
307///
308/// - Read-only: `put` and `delete` operations will return errors
309/// - `list` is not supported (HTTP doesn't have directory listings)
310///
311/// # Example
312///
313/// ```no_run
314/// use alimentar::backend::{HttpBackend, StorageBackend};
315///
316/// let backend = HttpBackend::new("https://huggingface.co/datasets").unwrap();
317/// let data = backend.get("squad/train.parquet").unwrap();
318/// ```
319#[derive(Debug)]
320pub struct HttpBackend {
321    client: Client,
322    base_url: String,
323}
324
325impl HttpBackend {
326    /// Creates a new HTTP backend with the given base URL.
327    ///
328    /// # Arguments
329    ///
330    /// * `base_url` - Base URL for all requests. Keys will be appended to this.
331    ///
332    /// # Errors
333    ///
334    /// Returns an error if the HTTP client cannot be created.
335    pub fn new(base_url: impl Into<String>) -> Result<Self> {
336        let base_url = base_url.into();
337        let client = Client::builder()
338            .user_agent("alimentar/0.1.0")
339            .build()
340            .map_err(|e| Error::storage(format!("Failed to create HTTP client: {e}")))?;
341
342        Ok(Self { client, base_url })
343    }
344
345    /// Creates a new HTTP backend with custom client configuration.
346    ///
347    /// # Arguments
348    ///
349    /// * `base_url` - Base URL for all requests
350    /// * `timeout_secs` - Request timeout in seconds
351    ///
352    /// # Errors
353    ///
354    /// Returns an error if the HTTP client cannot be created.
355    pub fn with_timeout(base_url: impl Into<String>, timeout_secs: u64) -> Result<Self> {
356        let base_url = base_url.into();
357        let client = Client::builder()
358            .user_agent("alimentar/0.1.0")
359            .timeout(std::time::Duration::from_secs(timeout_secs))
360            .build()
361            .map_err(|e| Error::storage(format!("Failed to create HTTP client: {e}")))?;
362
363        Ok(Self { client, base_url })
364    }
365
366    /// Returns the base URL.
367    pub fn base_url(&self) -> &str {
368        &self.base_url
369    }
370
371    /// Constructs the full URL for a key.
372    fn url_for(&self, key: &str) -> String {
373        if self.base_url.ends_with('/') {
374            format!("{}{}", self.base_url, key)
375        } else {
376            format!("{}/{}", self.base_url, key)
377        }
378    }
379}
380
381impl StorageBackend for HttpBackend {
382    fn list(&self, _prefix: &str) -> Result<Vec<String>> {
383        // HTTP doesn't support directory listings
384        Err(Error::storage(
385            "HTTP backend does not support listing (use a specific key instead)",
386        ))
387    }
388
389    fn get(&self, key: &str) -> Result<Bytes> {
390        let url = self.url_for(key);
391
392        let response = self
393            .client
394            .get(&url)
395            .send()
396            .map_err(|e| Error::storage(format!("HTTP GET error for '{}': {}", url, e)))?;
397
398        if !response.status().is_success() {
399            return Err(Error::storage(format!(
400                "HTTP GET failed for '{}': status {}",
401                url,
402                response.status()
403            )));
404        }
405
406        let bytes = response
407            .bytes()
408            .map_err(|e| Error::storage(format!("Failed to read HTTP response body: {e}")))?;
409
410        Ok(bytes)
411    }
412
413    fn put(&self, key: &str, _data: Bytes) -> Result<()> {
414        Err(Error::storage(format!(
415            "HTTP backend is read-only, cannot write to '{}'",
416            key
417        )))
418    }
419
420    fn delete(&self, key: &str) -> Result<()> {
421        Err(Error::storage(format!(
422            "HTTP backend is read-only, cannot delete '{}'",
423            key
424        )))
425    }
426
427    fn exists(&self, key: &str) -> Result<bool> {
428        let url = self.url_for(key);
429
430        let response = self
431            .client
432            .head(&url)
433            .send()
434            .map_err(|e| Error::storage(format!("HTTP HEAD error for '{}': {}", url, e)))?;
435
436        Ok(response.status().is_success())
437    }
438
439    fn size(&self, key: &str) -> Result<u64> {
440        let url = self.url_for(key);
441
442        let response = self
443            .client
444            .head(&url)
445            .send()
446            .map_err(|e| Error::storage(format!("HTTP HEAD error for '{}': {}", url, e)))?;
447
448        if !response.status().is_success() {
449            return Err(Error::storage(format!(
450                "HTTP HEAD failed for '{}': status {}",
451                url,
452                response.status()
453            )));
454        }
455
456        // Try to get Content-Length header
457        if let Some(content_length) = response.headers().get(CONTENT_LENGTH) {
458            if let Ok(len_str) = content_length.to_str() {
459                if let Ok(len) = len_str.parse::<u64>() {
460                    return Ok(len);
461                }
462            }
463        }
464
465        Err(Error::storage(format!(
466            "Server did not provide Content-Length for '{}'",
467            url
468        )))
469    }
470}
471
472/// HTTP backend with support for partial/range requests.
473///
474/// This variant supports reading specific byte ranges, which is useful
475/// for large files when only a portion is needed.
476#[derive(Debug)]
477pub struct RangeHttpBackend {
478    inner: HttpBackend,
479}
480
481impl RangeHttpBackend {
482    /// Creates a new range-capable HTTP backend.
483    ///
484    /// # Errors
485    ///
486    /// Returns an error if the base URL is invalid.
487    pub fn new(base_url: impl Into<String>) -> Result<Self> {
488        Ok(Self {
489            inner: HttpBackend::new(base_url)?,
490        })
491    }
492
493    /// Reads a specific byte range from a key.
494    ///
495    /// # Arguments
496    ///
497    /// * `key` - The key to read from
498    /// * `start` - Starting byte offset (inclusive)
499    /// * `end` - Ending byte offset (inclusive)
500    ///
501    /// # Errors
502    ///
503    /// Returns an error if the request fails or the server doesn't support
504    /// ranges.
505    pub fn get_range(&self, key: &str, start: u64, end: u64) -> Result<Bytes> {
506        let url = self.inner.url_for(key);
507        let range_header = format!("bytes={}-{}", start, end);
508
509        let response = self
510            .inner
511            .client
512            .get(&url)
513            .header(RANGE, range_header)
514            .send()
515            .map_err(|e| Error::storage(format!("HTTP GET range error for '{}': {}", url, e)))?;
516
517        // 206 Partial Content is expected for range requests
518        if response.status().as_u16() != 206 && !response.status().is_success() {
519            return Err(Error::storage(format!(
520                "HTTP GET range failed for '{}': status {}",
521                url,
522                response.status()
523            )));
524        }
525
526        let bytes = response
527            .bytes()
528            .map_err(|e| Error::storage(format!("Failed to read HTTP response body: {e}")))?;
529
530        Ok(bytes)
531    }
532
533    /// Checks if the server supports range requests.
534    ///
535    /// # Errors
536    ///
537    /// Returns an error if the HTTP HEAD request fails.
538    pub fn supports_range(&self, key: &str) -> Result<bool> {
539        let url = self.inner.url_for(key);
540
541        let response = self
542            .inner
543            .client
544            .head(&url)
545            .send()
546            .map_err(|e| Error::storage(format!("HTTP HEAD error for '{}': {}", url, e)))?;
547
548        if !response.status().is_success() {
549            return Ok(false);
550        }
551
552        // Check for Accept-Ranges header
553        if let Some(accept_ranges) = response.headers().get("accept-ranges") {
554            if let Ok(value) = accept_ranges.to_str() {
555                return Ok(value != "none");
556            }
557        }
558
559        Ok(false)
560    }
561}
562
563impl StorageBackend for RangeHttpBackend {
564    fn list(&self, prefix: &str) -> Result<Vec<String>> {
565        self.inner.list(prefix)
566    }
567
568    fn get(&self, key: &str) -> Result<Bytes> {
569        self.inner.get(key)
570    }
571
572    fn put(&self, key: &str, data: Bytes) -> Result<()> {
573        self.inner.put(key, data)
574    }
575
576    fn delete(&self, key: &str) -> Result<()> {
577        self.inner.delete(key)
578    }
579
580    fn exists(&self, key: &str) -> Result<bool> {
581        self.inner.exists(key)
582    }
583
584    fn size(&self, key: &str) -> Result<u64> {
585        self.inner.size(key)
586    }
587}
588
589#[cfg(test)]
590mod tests {
591    use super::*;
592
593    #[test]
594    fn test_url_construction() {
595        let backend = HttpBackend::new("https://example.com/data")
596            .ok()
597            .unwrap_or_else(|| panic!("Should create backend"));
598        assert_eq!(
599            backend.url_for("file.txt"),
600            "https://example.com/data/file.txt"
601        );
602
603        let backend_slash = HttpBackend::new("https://example.com/data/")
604            .ok()
605            .unwrap_or_else(|| panic!("Should create backend"));
606        assert_eq!(
607            backend_slash.url_for("file.txt"),
608            "https://example.com/data/file.txt"
609        );
610    }
611
612    #[test]
613    fn test_base_url() {
614        let backend = HttpBackend::new("https://example.com")
615            .ok()
616            .unwrap_or_else(|| panic!("Should create backend"));
617        assert_eq!(backend.base_url(), "https://example.com");
618    }
619
620    #[test]
621    fn test_put_is_read_only() {
622        let backend = HttpBackend::new("https://example.com")
623            .ok()
624            .unwrap_or_else(|| panic!("Should create backend"));
625        let result = backend.put("test.txt", Bytes::from("data"));
626        assert!(result.is_err());
627    }
628
629    #[test]
630    fn test_delete_is_read_only() {
631        let backend = HttpBackend::new("https://example.com")
632            .ok()
633            .unwrap_or_else(|| panic!("Should create backend"));
634        let result = backend.delete("test.txt");
635        assert!(result.is_err());
636    }
637
638    #[test]
639    fn test_list_not_supported() {
640        let backend = HttpBackend::new("https://example.com")
641            .ok()
642            .unwrap_or_else(|| panic!("Should create backend"));
643        let result = backend.list("");
644        assert!(result.is_err());
645    }
646
647    #[test]
648    fn test_with_timeout() {
649        let backend = HttpBackend::with_timeout("https://example.com", 30);
650        assert!(backend.is_ok());
651    }
652
653    #[test]
654    fn test_range_http_backend_new() {
655        let backend = RangeHttpBackend::new("https://example.com");
656        assert!(backend.is_ok());
657    }
658
659    #[test]
660    fn test_range_http_backend_list_not_supported() {
661        let backend = RangeHttpBackend::new("https://example.com")
662            .ok()
663            .unwrap_or_else(|| panic!("Should create backend"));
664        let result = backend.list("");
665        assert!(result.is_err());
666    }
667
668    #[test]
669    fn test_range_http_backend_put_is_read_only() {
670        let backend = RangeHttpBackend::new("https://example.com")
671            .ok()
672            .unwrap_or_else(|| panic!("Should create backend"));
673        let result = backend.put("test.txt", Bytes::from("data"));
674        assert!(result.is_err());
675    }
676
677    #[test]
678    fn test_range_http_backend_delete_is_read_only() {
679        let backend = RangeHttpBackend::new("https://example.com")
680            .ok()
681            .unwrap_or_else(|| panic!("Should create backend"));
682        let result = backend.delete("test.txt");
683        assert!(result.is_err());
684    }
685
686    #[test]
687    fn test_url_construction_nested_path() {
688        let backend = HttpBackend::new("https://example.com/api/v1/data")
689            .ok()
690            .unwrap_or_else(|| panic!("Should create backend"));
691        assert_eq!(
692            backend.url_for("datasets/train.parquet"),
693            "https://example.com/api/v1/data/datasets/train.parquet"
694        );
695    }
696
697    #[test]
698    fn test_http_backend_debug() {
699        let backend = HttpBackend::new("https://example.com")
700            .ok()
701            .unwrap_or_else(|| panic!("Should create backend"));
702        let debug_str = format!("{:?}", backend);
703        assert!(debug_str.contains("HttpBackend"));
704        assert!(debug_str.contains("example.com"));
705    }
706
707    #[test]
708    fn test_range_http_backend_debug() {
709        let backend = RangeHttpBackend::new("https://example.com")
710            .ok()
711            .unwrap_or_else(|| panic!("Should create backend"));
712        let debug_str = format!("{:?}", backend);
713        assert!(debug_str.contains("RangeHttpBackend"));
714    }
715
716    // === Additional coverage tests for HTTP operations ===
717
718    #[test]
719    fn test_url_construction_empty_key() {
720        let backend = HttpBackend::new("https://example.com/data")
721            .ok()
722            .unwrap_or_else(|| panic!("Should create backend"));
723        assert_eq!(backend.url_for(""), "https://example.com/data/");
724    }
725
726    #[test]
727    fn test_url_construction_with_query_params() {
728        let backend = HttpBackend::new("https://example.com/data")
729            .ok()
730            .unwrap_or_else(|| panic!("Should create backend"));
731        assert_eq!(
732            backend.url_for("file.txt?version=1"),
733            "https://example.com/data/file.txt?version=1"
734        );
735    }
736
737    #[test]
738    fn test_put_error_message_contains_key() {
739        let backend = HttpBackend::new("https://example.com")
740            .ok()
741            .unwrap_or_else(|| panic!("Should create backend"));
742        let result = backend.put("my_file.txt", Bytes::from("data"));
743        let err = result.err().expect("Should be error");
744        let msg = format!("{:?}", err);
745        assert!(msg.contains("my_file.txt"));
746    }
747
748    #[test]
749    fn test_delete_error_message_contains_key() {
750        let backend = HttpBackend::new("https://example.com")
751            .ok()
752            .unwrap_or_else(|| panic!("Should create backend"));
753        let result = backend.delete("my_file.txt");
754        let err = result.err().expect("Should be error");
755        let msg = format!("{:?}", err);
756        assert!(msg.contains("my_file.txt"));
757    }
758
759    #[test]
760    fn test_list_error_message() {
761        let backend = HttpBackend::new("https://example.com")
762            .ok()
763            .unwrap_or_else(|| panic!("Should create backend"));
764        let result = backend.list("prefix/");
765        let err = result.err().expect("Should be error");
766        let msg = format!("{:?}", err);
767        assert!(msg.contains("directory") || msg.contains("listing"));
768    }
769
770    #[test]
771    fn test_with_timeout_zero() {
772        // Zero timeout should still create a valid backend
773        let backend = HttpBackend::with_timeout("https://example.com", 0);
774        assert!(backend.is_ok());
775    }
776
777    #[test]
778    fn test_with_timeout_large() {
779        let backend = HttpBackend::with_timeout("https://example.com", 3600);
780        assert!(backend.is_ok());
781    }
782
783    #[test]
784    fn test_base_url_with_trailing_slash() {
785        let backend = HttpBackend::new("https://example.com/path/")
786            .ok()
787            .unwrap_or_else(|| panic!("Should create backend"));
788        assert_eq!(backend.base_url(), "https://example.com/path/");
789    }
790
791    #[test]
792    fn test_range_http_backend_get_delegates() {
793        // Test that RangeHttpBackend.get delegates to inner
794        let backend = RangeHttpBackend::new("https://httpbin.org")
795            .ok()
796            .unwrap_or_else(|| panic!("Should create backend"));
797        // We can't test actual HTTP calls without a server, but we can verify
798        // the delegation path is exercised
799        let result = backend.get("nonexistent-file.txt");
800        // This will fail because no server, but exercises the code path
801        assert!(result.is_err());
802    }
803
804    #[test]
805    fn test_range_http_backend_exists_delegates() {
806        let backend = RangeHttpBackend::new("https://httpbin.org")
807            .ok()
808            .unwrap_or_else(|| panic!("Should create backend"));
809        let result = backend.exists("nonexistent-file.txt");
810        // Either error (network) or false (not found)
811        match result {
812            Ok(exists) => assert!(!exists),
813            Err(_) => {} // Network error is acceptable
814        }
815    }
816
817    #[test]
818    fn test_range_http_backend_size_delegates() {
819        let backend = RangeHttpBackend::new("https://httpbin.org")
820            .ok()
821            .unwrap_or_else(|| panic!("Should create backend"));
822        let result = backend.size("nonexistent-file.txt");
823        // Will fail - exercises code path
824        assert!(result.is_err());
825    }
826
827    #[test]
828    fn test_url_construction_special_chars() {
829        let backend = HttpBackend::new("https://example.com")
830            .ok()
831            .unwrap_or_else(|| panic!("Should create backend"));
832        // URL with spaces (should be encoded by caller)
833        assert_eq!(
834            backend.url_for("file%20name.txt"),
835            "https://example.com/file%20name.txt"
836        );
837    }
838
839    #[test]
840    fn test_url_construction_unicode() {
841        let backend = HttpBackend::new("https://example.com")
842            .ok()
843            .unwrap_or_else(|| panic!("Should create backend"));
844        assert_eq!(
845            backend.url_for("données.txt"),
846            "https://example.com/données.txt"
847        );
848    }
849
850    #[test]
851    fn test_multiple_backends_independent() {
852        let backend1 = HttpBackend::new("https://example1.com")
853            .ok()
854            .unwrap_or_else(|| panic!("Should create backend"));
855        let backend2 = HttpBackend::new("https://example2.com")
856            .ok()
857            .unwrap_or_else(|| panic!("Should create backend"));
858
859        assert_eq!(backend1.base_url(), "https://example1.com");
860        assert_eq!(backend2.base_url(), "https://example2.com");
861    }
862
863    #[test]
864    fn test_range_backend_delegation_put() {
865        let backend = RangeHttpBackend::new("https://example.com")
866            .ok()
867            .unwrap_or_else(|| panic!("Should create backend"));
868        // Should delegate to inner.put which returns read-only error
869        let result = backend.put("test.txt", Bytes::from("data"));
870        assert!(result.is_err());
871    }
872
873    #[test]
874    fn test_range_backend_delegation_delete() {
875        let backend = RangeHttpBackend::new("https://example.com")
876            .ok()
877            .unwrap_or_else(|| panic!("Should create backend"));
878        // Should delegate to inner.delete which returns read-only error
879        let result = backend.delete("test.txt");
880        assert!(result.is_err());
881    }
882
883    #[test]
884    fn test_range_backend_delegation_list() {
885        let backend = RangeHttpBackend::new("https://example.com")
886            .ok()
887            .unwrap_or_else(|| panic!("Should create backend"));
888        // Should delegate to inner.list which returns not-supported error
889        let result = backend.list("");
890        assert!(result.is_err());
891    }
892
893    // === Additional HTTP backend tests ===
894
895    #[test]
896    fn test_http_backend_url_with_port() {
897        let backend = HttpBackend::new("https://example.com:8080/api")
898            .ok()
899            .unwrap_or_else(|| panic!("Should create backend"));
900        assert_eq!(
901            backend.url_for("data.json"),
902            "https://example.com:8080/api/data.json"
903        );
904    }
905
906    #[test]
907    fn test_http_backend_url_with_path_segments() {
908        let backend = HttpBackend::new("https://cdn.example.com/v1/datasets")
909            .ok()
910            .unwrap_or_else(|| panic!("Should create backend"));
911        assert_eq!(
912            backend.url_for("train/data.parquet"),
913            "https://cdn.example.com/v1/datasets/train/data.parquet"
914        );
915    }
916
917    #[test]
918    fn test_http_backend_list_error_contains_context() {
919        let backend = HttpBackend::new("https://example.com")
920            .ok()
921            .unwrap_or_else(|| panic!("Should create backend"));
922        let result = backend.list("some/prefix");
923        assert!(result.is_err());
924        if let Err(e) = result {
925            let msg = format!("{:?}", e);
926            assert!(msg.contains("listing") || msg.contains("directory"));
927        }
928    }
929
930    #[test]
931    fn test_http_backend_put_error_includes_key() {
932        let backend = HttpBackend::new("https://example.com")
933            .ok()
934            .unwrap_or_else(|| panic!("Should create backend"));
935        let result = backend.put("path/to/file.txt", Bytes::from("content"));
936        assert!(result.is_err());
937        if let Err(e) = result {
938            let msg = format!("{:?}", e);
939            assert!(msg.contains("path/to/file.txt") || msg.contains("read-only"));
940        }
941    }
942
943    #[test]
944    fn test_http_backend_delete_error_includes_key() {
945        let backend = HttpBackend::new("https://example.com")
946            .ok()
947            .unwrap_or_else(|| panic!("Should create backend"));
948        let result = backend.delete("path/to/file.txt");
949        assert!(result.is_err());
950        if let Err(e) = result {
951            let msg = format!("{:?}", e);
952            assert!(msg.contains("path/to/file.txt") || msg.contains("read-only"));
953        }
954    }
955
956    #[test]
957    fn test_range_http_backend_creation_variations() {
958        // Test with various URL formats
959        assert!(RangeHttpBackend::new("https://example.com").is_ok());
960        assert!(RangeHttpBackend::new("https://example.com/").is_ok());
961        assert!(RangeHttpBackend::new("https://example.com/path").is_ok());
962        assert!(RangeHttpBackend::new("http://localhost:3000").is_ok());
963    }
964
965    #[test]
966    fn test_http_backend_with_timeout_variations() {
967        // Very short timeout
968        assert!(HttpBackend::with_timeout("https://example.com", 1).is_ok());
969        // Medium timeout
970        assert!(HttpBackend::with_timeout("https://example.com", 30).is_ok());
971        // Long timeout
972        assert!(HttpBackend::with_timeout("https://example.com", 600).is_ok());
973    }
974
975    #[test]
976    fn test_http_backend_url_construction_edge_cases() {
977        // Double slash prevention
978        let backend = HttpBackend::new("https://example.com/")
979            .ok()
980            .unwrap_or_else(|| panic!("backend"));
981        // Should not have double slashes
982        let url = backend.url_for("file.txt");
983        assert!(!url.contains("//file"));
984
985        // Leading slash in key
986        let url2 = backend.url_for("/file.txt");
987        // The URL is simply concatenated, so this is expected behavior
988        assert!(url2.contains("file.txt"));
989    }
990
991    #[test]
992    fn test_http_backend_base_url_preserved() {
993        let urls = vec![
994            "https://example.com",
995            "https://example.com/",
996            "https://example.com/api/v1",
997            "https://example.com/api/v1/",
998            "http://localhost:8080",
999        ];
1000
1001        for url in urls {
1002            let backend = HttpBackend::new(url)
1003                .ok()
1004                .unwrap_or_else(|| panic!("Should create backend for {}", url));
1005            assert_eq!(backend.base_url(), url);
1006        }
1007    }
1008
1009    #[test]
1010    fn test_range_http_backend_put_delegates_error() {
1011        let backend = RangeHttpBackend::new("https://example.com")
1012            .ok()
1013            .unwrap_or_else(|| panic!("backend"));
1014
1015        let result = backend.put("any/path.txt", Bytes::from("data"));
1016        assert!(result.is_err());
1017
1018        // Error should indicate read-only
1019        if let Err(e) = result {
1020            let msg = format!("{:?}", e);
1021            assert!(msg.contains("read-only") || msg.contains("any/path.txt"));
1022        }
1023    }
1024
1025    #[test]
1026    fn test_range_http_backend_delete_delegates_error() {
1027        let backend = RangeHttpBackend::new("https://example.com")
1028            .ok()
1029            .unwrap_or_else(|| panic!("backend"));
1030
1031        let result = backend.delete("some/file.parquet");
1032        assert!(result.is_err());
1033
1034        if let Err(e) = result {
1035            let msg = format!("{:?}", e);
1036            assert!(msg.contains("read-only") || msg.contains("some/file.parquet"));
1037        }
1038    }
1039
1040    #[test]
1041    fn test_range_http_backend_list_delegates_error() {
1042        let backend = RangeHttpBackend::new("https://example.com")
1043            .ok()
1044            .unwrap_or_else(|| panic!("backend"));
1045
1046        let result = backend.list("prefix/");
1047        assert!(result.is_err());
1048
1049        if let Err(e) = result {
1050            let msg = format!("{:?}", e);
1051            assert!(msg.contains("listing") || msg.contains("directory"));
1052        }
1053    }
1054
1055    #[test]
1056    fn test_http_backend_url_for_with_fragment() {
1057        let backend = HttpBackend::new("https://example.com")
1058            .ok()
1059            .unwrap_or_else(|| panic!("backend"));
1060
1061        // URL with fragment (though unusual for data files)
1062        let url = backend.url_for("file.txt#section");
1063        assert_eq!(url, "https://example.com/file.txt#section");
1064    }
1065
1066    // ========================================================================
1067    // Mock HTTP Client Tests
1068    // ========================================================================
1069
1070    #[test]
1071    fn test_mock_http_client_get_response() {
1072        let mock = MockHttpClient::new()
1073            .with_get_response("https://example.com/data.txt", HttpResponse::ok("hello"));
1074
1075        let response = mock.get("https://example.com/data.txt").unwrap();
1076        assert!(response.is_success);
1077        assert_eq!(response.status, 200);
1078        assert_eq!(response.body, Bytes::from("hello"));
1079    }
1080
1081    #[test]
1082    fn test_mock_http_client_head_response() {
1083        let mock = MockHttpClient::new().with_head_response(
1084            "https://example.com/file.txt",
1085            HttpResponse::ok(Bytes::new()).with_content_length(1024),
1086        );
1087
1088        let response = mock.head("https://example.com/file.txt").unwrap();
1089        assert!(response.is_success);
1090        assert_eq!(response.content_length, Some(1024));
1091    }
1092
1093    #[test]
1094    fn test_mock_http_client_default_response() {
1095        let mock = MockHttpClient::new().with_default_response(HttpResponse::not_found());
1096
1097        let response = mock.get("https://any-url.com/anything").unwrap();
1098        assert!(!response.is_success);
1099        assert_eq!(response.status, 404);
1100    }
1101
1102    #[test]
1103    fn test_mock_http_client_no_response_error() {
1104        let mock = MockHttpClient::new();
1105
1106        let result = mock.get("https://example.com/missing");
1107        assert!(result.is_err());
1108    }
1109
1110    #[test]
1111    fn test_mock_http_client_get_range() {
1112        let mock = MockHttpClient::new().with_get_response(
1113            "https://example.com/large.bin",
1114            HttpResponse::partial_content("partial content"),
1115        );
1116
1117        let response = mock
1118            .get_range("https://example.com/large.bin", 0, 100)
1119            .unwrap();
1120        assert!(response.is_success);
1121        assert_eq!(response.status, 206);
1122    }
1123
1124    #[test]
1125    fn test_http_response_builder_methods() {
1126        let response = HttpResponse::ok("test")
1127            .with_content_length(100)
1128            .with_accept_ranges("bytes");
1129
1130        assert_eq!(response.content_length, Some(100));
1131        assert_eq!(response.accept_ranges, Some("bytes".to_string()));
1132    }
1133
1134    #[test]
1135    fn test_mock_http_client_clone() {
1136        let mock =
1137            MockHttpClient::new().with_get_response("https://example.com/a", HttpResponse::ok("a"));
1138
1139        let cloned = mock.clone();
1140        let response = cloned.get("https://example.com/a").unwrap();
1141        assert!(response.is_success);
1142    }
1143
1144    #[test]
1145    fn test_http_response_debug() {
1146        let response = HttpResponse::ok("test");
1147        let debug = format!("{:?}", response);
1148        assert!(debug.contains("HttpResponse"));
1149    }
1150
1151    // ========================================================================
1152    // ReqwestHttpClient Tests
1153    // ========================================================================
1154
1155    #[test]
1156    fn test_reqwest_http_client_new() {
1157        let client = ReqwestHttpClient::new();
1158        assert!(client.is_ok());
1159    }
1160
1161    #[test]
1162    fn test_reqwest_http_client_with_timeout() {
1163        let client = ReqwestHttpClient::with_timeout(30);
1164        assert!(client.is_ok());
1165        // Verify the client was created successfully (timeout is applied internally)
1166        let _client = client.unwrap();
1167    }
1168
1169    #[test]
1170    fn test_reqwest_http_client_debug() {
1171        let client = ReqwestHttpClient::new().unwrap();
1172        let debug = format!("{:?}", client);
1173        assert!(debug.contains("ReqwestHttpClient"));
1174    }
1175
1176    // ========================================================================
1177    // HttpResponse Tests
1178    // ========================================================================
1179
1180    #[test]
1181    fn test_http_response_not_found() {
1182        let response = HttpResponse::not_found();
1183        assert_eq!(response.status, 404);
1184        assert!(!response.is_success);
1185        assert!(response.body.is_empty());
1186    }
1187
1188    #[test]
1189    fn test_http_response_partial_content() {
1190        let response = HttpResponse::partial_content("partial");
1191        assert_eq!(response.status, 206);
1192        assert!(response.is_success);
1193        assert_eq!(response.body, Bytes::from("partial"));
1194        assert_eq!(response.accept_ranges, Some("bytes".to_string()));
1195    }
1196
1197    #[test]
1198    fn test_http_response_clone() {
1199        let response = HttpResponse::ok("test").with_content_length(100);
1200        let cloned = response.clone();
1201        assert_eq!(cloned.status, response.status);
1202        assert_eq!(cloned.body, response.body);
1203        assert_eq!(cloned.content_length, response.content_length);
1204    }
1205
1206    // ========================================================================
1207    // Mock Client Integration Tests
1208    // ========================================================================
1209
1210    #[test]
1211    fn test_mock_client_multiple_urls() {
1212        let mock = MockHttpClient::new()
1213            .with_get_response("https://a.com/1", HttpResponse::ok("first"))
1214            .with_get_response("https://b.com/2", HttpResponse::ok("second"));
1215
1216        let r1 = mock.get("https://a.com/1").unwrap();
1217        let r2 = mock.get("https://b.com/2").unwrap();
1218
1219        assert_eq!(r1.body, Bytes::from("first"));
1220        assert_eq!(r2.body, Bytes::from("second"));
1221    }
1222
1223    #[test]
1224    fn test_mock_client_head_uses_own_map() {
1225        let mock = MockHttpClient::new()
1226            .with_get_response("https://example.com/file", HttpResponse::ok("content"))
1227            .with_head_response(
1228                "https://example.com/file",
1229                HttpResponse::ok(Bytes::new()).with_content_length(7),
1230            );
1231
1232        let get_resp = mock.get("https://example.com/file").unwrap();
1233        let head_resp = mock.head("https://example.com/file").unwrap();
1234
1235        assert_eq!(get_resp.body, Bytes::from("content"));
1236        assert_eq!(head_resp.content_length, Some(7));
1237    }
1238
1239    #[test]
1240    fn test_mock_client_default_fallback_for_head() {
1241        let mock = MockHttpClient::new()
1242            .with_default_response(HttpResponse::ok(Bytes::new()).with_content_length(999));
1243
1244        let response = mock.head("https://any.com/file").unwrap();
1245        assert_eq!(response.content_length, Some(999));
1246    }
1247}