Skip to main content

pulith_fetch/fetch/
conditional.rs

1//! Conditional download functionality.
2//!
3//! This module provides the ability to conditionally download files based on
4//! ETag and Last-Modified headers, avoiding unnecessary downloads when the
5//! remote file hasn't changed.
6
7use std::path::{Path, PathBuf};
8use std::time::SystemTime;
9
10use crate::config::FetchOptions;
11use crate::error::{Error, Result};
12use crate::fetch::fetcher::Fetcher;
13use crate::net::http::HttpClient;
14
15/// Metadata about a remote file for conditional requests.
16#[derive(Debug, Clone)]
17pub struct RemoteMetadata {
18    /// ETag header value if present
19    pub etag: Option<String>,
20    /// Last-Modified header value if present
21    pub last_modified: Option<String>,
22    /// Content-Length header value if present
23    pub content_length: Option<u64>,
24}
25
26/// Conditional download configuration.
27#[derive(Debug, Clone)]
28pub struct ConditionalOptions {
29    /// Force download even if conditions suggest it's not needed
30    pub force: bool,
31    /// Store metadata for future conditional requests
32    pub store_metadata: bool,
33}
34
35impl Default for ConditionalOptions {
36    fn default() -> Self {
37        Self {
38            force: false,
39            store_metadata: true,
40        }
41    }
42}
43
44/// Conditional fetcher that checks ETag/Last-Modified before downloading.
45pub struct ConditionalFetcher<C: HttpClient> {
46    base_fetcher: Fetcher<C>,
47    metadata_dir: PathBuf,
48}
49
50impl<C: HttpClient + 'static> ConditionalFetcher<C> {
51    /// Create a new conditional fetcher.
52    pub fn new(client: C, workspace_root: impl Into<PathBuf>) -> Self {
53        let workspace_root = workspace_root.into();
54        Self {
55            base_fetcher: Fetcher::new(client, workspace_root.clone()),
56            metadata_dir: workspace_root.join(".metadata"),
57        }
58    }
59
60    /// Fetch a file conditionally based on ETag/Last-Modified.
61    pub async fn fetch_conditional(
62        &self,
63        url: &str,
64        destination: &Path,
65        options: FetchOptions,
66        conditional_options: ConditionalOptions,
67    ) -> Result<Option<PathBuf>> {
68        // Ensure metadata directory exists
69        tokio::fs::create_dir_all(&self.metadata_dir)
70            .await
71            .map_err(|e| Error::Network(e.to_string()))?;
72
73        // Get remote metadata
74        let remote_metadata = self.get_remote_metadata(url).await?;
75
76        // Check if we should skip download
77        if !conditional_options.force
78            && let Some(local_metadata) = self.load_local_metadata(url, destination).await?
79            && self.is_content_unchanged(&local_metadata, &remote_metadata)
80        {
81            return Ok(None); // Skip download
82        }
83
84        // Perform the download
85        let result = self
86            .base_fetcher
87            .fetch_with_receipt(url, destination, options)
88            .await;
89
90        match result {
91            Ok(receipt) => {
92                // Store metadata for future conditional requests
93                if conditional_options.store_metadata {
94                    let _ = self
95                        .store_metadata(url, destination, &remote_metadata)
96                        .await;
97                }
98                Ok(Some(receipt.destination))
99            }
100            Err(e) => Err(e),
101        }
102    }
103
104    /// Get metadata from remote server using HEAD request.
105    async fn get_remote_metadata(&self, url: &str) -> Result<RemoteMetadata> {
106        // This would need to be implemented in the HttpClient trait
107        // For now, we'll simulate with a basic implementation
108        let total_bytes = self
109            .base_fetcher
110            .head(url)
111            .await
112            .map_err(|e| Error::Network(e.to_string()))?;
113
114        Ok(RemoteMetadata {
115            etag: None,          // Would be parsed from HEAD response
116            last_modified: None, // Would be parsed from HEAD response
117            content_length: total_bytes,
118        })
119    }
120
121    /// Load stored metadata for a URL/destination pair.
122    async fn load_local_metadata(
123        &self,
124        url: &str,
125        destination: &Path,
126    ) -> Result<Option<RemoteMetadata>> {
127        let metadata_path = self.metadata_path(url, destination);
128
129        if !metadata_path.exists() {
130            return Ok(None);
131        }
132
133        let content = tokio::fs::read_to_string(&metadata_path)
134            .await
135            .map_err(|e| Error::Network(e.to_string()))?;
136
137        // Parse metadata (simplified - would use proper serialization)
138        Ok(Some(RemoteMetadata {
139            etag: None,
140            last_modified: None,
141            content_length: content.parse().ok(),
142        }))
143    }
144
145    /// Store metadata for future conditional requests.
146    async fn store_metadata(
147        &self,
148        url: &str,
149        destination: &Path,
150        metadata: &RemoteMetadata,
151    ) -> Result<()> {
152        let metadata_path = self.metadata_path(url, destination);
153
154        // Ensure metadata directory exists
155        tokio::fs::create_dir_all(&self.metadata_dir)
156            .await
157            .map_err(|e| Error::Network(e.to_string()))?;
158
159        // Store content length as simple text (would use proper serialization)
160        if let Some(content_length) = metadata.content_length {
161            tokio::fs::write(&metadata_path, content_length.to_string())
162                .await
163                .map_err(|e| Error::Network(e.to_string()))?;
164        }
165
166        Ok(())
167    }
168
169    /// Check if content has changed based on metadata.
170    fn is_content_unchanged(&self, local: &RemoteMetadata, remote: &RemoteMetadata) -> bool {
171        // Check ETag first (most reliable)
172        if let (Some(local_etag), Some(remote_etag)) = (&local.etag, &remote.etag) {
173            return local_etag == remote_etag;
174        }
175
176        // Fall back to Last-Modified
177        if let (Some(local_modified), Some(remote_modified)) =
178            (&local.last_modified, &remote.last_modified)
179        {
180            return local_modified == remote_modified;
181        }
182
183        // Fall back to Content-Length (least reliable)
184        if let (Some(local_length), Some(remote_length)) =
185            (local.content_length, remote.content_length)
186        {
187            return local_length == remote_length;
188        }
189
190        false // Default to downloading if we can't determine
191    }
192
193    /// Get the metadata file path for a URL/destination pair.
194    fn metadata_path(&self, url: &str, destination: &Path) -> PathBuf {
195        use std::collections::hash_map::DefaultHasher;
196        use std::hash::{Hash, Hasher};
197
198        // Create a unique filename from URL and destination
199        let mut hasher = DefaultHasher::new();
200        url.hash(&mut hasher);
201        destination.hash(&mut hasher);
202        let hash = hasher.finish();
203
204        self.metadata_dir
205            .join(format!("metadata_{:016x}.txt", hash))
206    }
207
208    /// Clean up old metadata files.
209    pub async fn cleanup_old_metadata(&self, max_age_seconds: u64) -> Result<usize> {
210        let mut cleaned = 0;
211        let _cutoff = SystemTime::now()
212            .duration_since(std::time::UNIX_EPOCH)
213            .unwrap_or_default()
214            .as_secs()
215            - max_age_seconds;
216
217        // Check if metadata directory exists
218        if !self.metadata_dir.exists() {
219            return Ok(0);
220        }
221
222        let mut entries = tokio::fs::read_dir(&self.metadata_dir)
223            .await
224            .map_err(|e| Error::Network(e.to_string()))?;
225
226        while let Some(entry) = entries
227            .next_entry()
228            .await
229            .map_err(|e| Error::Network(e.to_string()))?
230        {
231            let path = entry.path();
232
233            if path.extension().and_then(|s| s.to_str()) == Some("txt") {
234                if max_age_seconds == 0 {
235                    let _ = tokio::fs::remove_file(&path).await;
236                    cleaned += 1;
237                    continue;
238                }
239
240                let metadata = entry
241                    .metadata()
242                    .await
243                    .map_err(|e| Error::Network(e.to_string()))?;
244
245                if let Ok(modified) = metadata.modified()
246                    && let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH)
247                {
248                    // File is old if its modification time is before the cutoff
249                    // Since we're looking for files older than max_age_seconds,
250                    // we want files where (now - file_time) > max_age_seconds
251                    // Which means file_time < (now - max_age_seconds)
252                    let now = std::time::SystemTime::now()
253                        .duration_since(std::time::UNIX_EPOCH)
254                        .unwrap_or_default()
255                        .as_secs();
256                    if duration.as_secs() < (now - max_age_seconds) {
257                        let _ = tokio::fs::remove_file(&path).await;
258                        cleaned += 1;
259                    }
260                }
261            }
262        }
263
264        Ok(cleaned)
265    }
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271    use std::time::Duration;
272    use tempfile::TempDir;
273    use tokio::time::sleep;
274
275    /// Simple mock HTTP client for testing
276    #[derive(Debug)]
277    struct MockClient;
278
279    impl MockClient {
280        fn new() -> Self {
281            Self
282        }
283    }
284
285    #[derive(Debug)]
286    struct MockError(String);
287
288    impl std::fmt::Display for MockError {
289        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
290            write!(f, "{}", self.0)
291        }
292    }
293
294    impl std::error::Error for MockError {}
295
296    impl HttpClient for MockClient {
297        type Error = MockError;
298
299        async fn stream(
300            &self,
301            _url: &str,
302            _headers: &[(String, String)],
303        ) -> std::result::Result<
304            crate::net::http::BoxStream<'static, std::result::Result<bytes::Bytes, Self::Error>>,
305            Self::Error,
306        > {
307            let empty: crate::net::http::BoxStream<
308                'static,
309                std::result::Result<bytes::Bytes, Self::Error>,
310            > = Box::pin(futures_util::stream::empty());
311            Ok(empty)
312        }
313
314        async fn head(&self, _url: &str) -> std::result::Result<Option<u64>, Self::Error> {
315            Ok(Some(1024))
316        }
317    }
318
319    #[test]
320    fn test_remote_metadata() {
321        let metadata = RemoteMetadata {
322            etag: Some("\"abc123\"".to_string()),
323            last_modified: Some("Wed, 21 Oct 2015 07:28:00 GMT".to_string()),
324            content_length: Some(1024),
325        };
326
327        assert_eq!(metadata.etag, Some("\"abc123\"".to_string()));
328        assert_eq!(
329            metadata.last_modified,
330            Some("Wed, 21 Oct 2015 07:28:00 GMT".to_string())
331        );
332        assert_eq!(metadata.content_length, Some(1024));
333    }
334
335    #[test]
336    fn test_conditional_options_default() {
337        let options = ConditionalOptions::default();
338        assert!(!options.force);
339        assert!(options.store_metadata);
340    }
341
342    #[test]
343    fn test_is_content_unchanged() {
344        let fetcher = ConditionalFetcher::<MockClient>::new(
345            MockClient::new(),
346            TempDir::new().unwrap().path(),
347        );
348
349        // Test ETag comparison
350        let local = RemoteMetadata {
351            etag: Some("\"abc123\"".to_string()),
352            last_modified: None,
353            content_length: None,
354        };
355        let remote_same = RemoteMetadata {
356            etag: Some("\"abc123\"".to_string()),
357            last_modified: None,
358            content_length: None,
359        };
360        let remote_different = RemoteMetadata {
361            etag: Some("\"def456\"".to_string()),
362            last_modified: None,
363            content_length: None,
364        };
365
366        assert!(fetcher.is_content_unchanged(&local, &remote_same));
367        assert!(!fetcher.is_content_unchanged(&local, &remote_different));
368
369        // Test Last-Modified comparison
370        let local = RemoteMetadata {
371            etag: None,
372            last_modified: Some("Wed, 21 Oct 2015 07:28:00 GMT".to_string()),
373            content_length: None,
374        };
375        let remote_same = RemoteMetadata {
376            etag: None,
377            last_modified: Some("Wed, 21 Oct 2015 07:28:00 GMT".to_string()),
378            content_length: None,
379        };
380        let remote_different = RemoteMetadata {
381            etag: None,
382            last_modified: Some("Thu, 22 Oct 2015 07:28:00 GMT".to_string()),
383            content_length: None,
384        };
385
386        assert!(fetcher.is_content_unchanged(&local, &remote_same));
387        assert!(!fetcher.is_content_unchanged(&local, &remote_different));
388
389        // Test Content-Length comparison
390        let local = RemoteMetadata {
391            etag: None,
392            last_modified: None,
393            content_length: Some(1024),
394        };
395        let remote_same = RemoteMetadata {
396            etag: None,
397            last_modified: None,
398            content_length: Some(1024),
399        };
400        let remote_different = RemoteMetadata {
401            etag: None,
402            last_modified: None,
403            content_length: Some(2048),
404        };
405
406        assert!(fetcher.is_content_unchanged(&local, &remote_same));
407        assert!(!fetcher.is_content_unchanged(&local, &remote_different));
408    }
409
410    #[tokio::test]
411    async fn test_metadata_path() {
412        let temp_dir = TempDir::new().unwrap();
413        let fetcher = ConditionalFetcher::<MockClient>::new(MockClient::new(), temp_dir.path());
414
415        let url = "https://example.com/file.txt";
416        let destination = Path::new("/tmp/file.txt");
417
418        let path1 = fetcher.metadata_path(url, destination);
419        let path2 = fetcher.metadata_path(url, destination);
420        let path3 = fetcher.metadata_path("https://example.com/other.txt", destination);
421
422        // Same URL/destination should produce same path
423        assert_eq!(path1, path2);
424
425        // Different URL should produce different path
426        assert_ne!(path1, path3);
427
428        // Path should be in metadata directory
429        assert!(path1.starts_with(temp_dir.path().join(".metadata")));
430        assert!(
431            path1
432                .file_name()
433                .unwrap()
434                .to_str()
435                .unwrap()
436                .starts_with("metadata_")
437        );
438    }
439
440    #[tokio::test]
441    async fn test_store_and_load_metadata() {
442        let temp_dir = TempDir::new().unwrap();
443        let fetcher: ConditionalFetcher<MockClient> =
444            ConditionalFetcher::new(MockClient::new(), temp_dir.path());
445
446        let url = "https://example.com/file.txt";
447        let destination = Path::new("/tmp/file.txt");
448        let metadata = RemoteMetadata {
449            etag: Some("\"abc123\"".to_string()),
450            last_modified: Some("Wed, 21 Oct 2015 07:28:00 GMT".to_string()),
451            content_length: Some(1024),
452        };
453
454        // Store metadata
455        fetcher
456            .store_metadata(url, destination, &metadata)
457            .await
458            .unwrap();
459
460        // Load metadata
461        let loaded = fetcher.load_local_metadata(url, destination).await.unwrap();
462        assert!(loaded.is_some());
463
464        // Note: In real implementation, this would preserve all fields
465        // For now, we're only storing content_length
466        assert_eq!(loaded.unwrap().content_length, Some(1024));
467    }
468
469    #[tokio::test]
470    async fn test_cleanup_old_metadata() {
471        let temp_dir = TempDir::new().unwrap();
472        let fetcher: ConditionalFetcher<MockClient> =
473            ConditionalFetcher::new(MockClient::new(), temp_dir.path());
474
475        let url = "https://example.com/file.txt";
476        let destination = Path::new("/tmp/file.txt");
477        let metadata = RemoteMetadata {
478            etag: None,
479            last_modified: None,
480            content_length: Some(1024),
481        };
482
483        // Store metadata
484        fetcher
485            .store_metadata(url, destination, &metadata)
486            .await
487            .unwrap();
488
489        // Wait a bit to ensure time difference
490        sleep(Duration::from_millis(10)).await;
491
492        // Clean up with max age of 0 seconds (should clean up all files)
493        let cleaned = fetcher.cleanup_old_metadata(0).await.unwrap();
494
495        assert_eq!(cleaned, 1);
496    }
497}