geocog/
range_reader.rs

1//! Range-based reader interface for COG files
2//!
3//! This module provides a unified interface for reading byte ranges from various sources
4//! (local files, S3, HTTP). This is essential for efficient COG reading since COGs are
5//! designed to be read via HTTP Range requests.
6
7use std::fs::File;
8use std::io::{Read, Seek, SeekFrom};
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11
12use crate::tiff_utils::AnyResult;
13
14/// Trait for reading byte ranges from any source
15///
16/// This abstraction allows the same COG reading code to work with:
17/// - Local files (using seek + read)
18/// - S3 objects (using `GetObject` with Range header)
19/// - HTTP URLs (using Range header)
20pub trait RangeReader: Send + Sync {
21    /// Read a range of bytes from the source
22    fn read_range(&self, offset: u64, length: usize) -> AnyResult<Vec<u8>>;
23
24    /// Get the total size of the source in bytes
25    fn size(&self) -> u64;
26
27    /// Get a human-readable identifier for this source (for logging/errors)
28    fn identifier(&self) -> &str;
29
30    /// Check if this is a local file (fast random access) or remote (expensive reads)
31    fn is_local(&self) -> bool {
32        let id = self.identifier();
33        !id.starts_with("http://") && !id.starts_with("https://") && !id.starts_with("s3://")
34    }
35}
36
37/// Local file range reader
38pub struct LocalRangeReader {
39    path: PathBuf,
40    size: u64,
41}
42
43impl LocalRangeReader {
44    pub fn new(path: impl AsRef<Path>) -> AnyResult<Self> {
45        let path = path.as_ref().to_path_buf();
46        let metadata = std::fs::metadata(&path)?;
47        Ok(Self {
48            path,
49            size: metadata.len(),
50        })
51    }
52}
53
54impl RangeReader for LocalRangeReader {
55    fn read_range(&self, offset: u64, length: usize) -> AnyResult<Vec<u8>> {
56        let mut file = File::open(&self.path)?;
57        file.seek(SeekFrom::Start(offset))?;
58        let mut buffer = vec![0u8; length];
59        file.read_exact(&mut buffer)?;
60        Ok(buffer)
61    }
62
63    fn size(&self) -> u64 {
64        self.size
65    }
66
67    fn identifier(&self) -> &str {
68        self.path.to_str().unwrap_or("<invalid path>")
69    }
70}
71
72/// HTTP range reader for remote COG files
73/// Uses reqwest with blocking client for simplicity in sync contexts
74pub struct HttpRangeReader {
75    url: String,
76    size: u64,
77    client: reqwest::blocking::Client,
78}
79
80impl HttpRangeReader {
81    pub fn new(url: &str) -> AnyResult<Self> {
82        let client = reqwest::blocking::Client::builder()
83            .timeout(std::time::Duration::from_secs(30))
84            .build()?;
85
86        // Get file size via HEAD request
87        let response = client.head(url).send()?;
88        let size = response
89            .headers()
90            .get("content-length")
91            .and_then(|v| v.to_str().ok())
92            .and_then(|v| v.parse().ok())
93            .unwrap_or(0);
94
95        Ok(Self {
96            url: url.to_string(),
97            size,
98            client,
99        })
100    }
101}
102
103impl RangeReader for HttpRangeReader {
104    fn read_range(&self, offset: u64, length: usize) -> AnyResult<Vec<u8>> {
105        let range = format!("bytes={}-{}", offset, offset + length as u64 - 1);
106        let response = self.client
107            .get(&self.url)
108            .header("Range", range)
109            .send()?;
110
111        if !response.status().is_success() {
112            return Err(format!("HTTP request failed: {}", response.status()).into());
113        }
114
115        Ok(response.bytes()?.to_vec())
116    }
117
118    fn size(&self) -> u64 {
119        self.size
120    }
121
122    fn identifier(&self) -> &str {
123        &self.url
124    }
125}
126
127/// S3 range reader using `object_store`
128pub struct S3RangeReader {
129    #[allow(dead_code)]
130    bucket: String,
131    #[allow(dead_code)]
132    key: String,
133    size: u64,
134    // We store the URL for identification
135    url: String,
136}
137
138impl S3RangeReader {
139    /// Create from an S3 URL like <s3://bucket/key>
140    pub fn new(url: &str) -> AnyResult<Self> {
141        // Parse s3://bucket/key format
142        let url_parsed = url::Url::parse(url)?;
143
144        if url_parsed.scheme() != "s3" {
145            return Err("URL must use s3:// scheme".into());
146        }
147
148        let bucket = url_parsed.host_str()
149            .ok_or("Missing bucket in S3 URL")?
150            .to_string();
151
152        let key = url_parsed.path().trim_start_matches('/').to_string();
153
154        if key.is_empty() {
155            return Err("Missing key in S3 URL".into());
156        }
157
158        // For now, return a placeholder - actual S3 implementation would use aws-sdk-s3
159        // This is a simplified version that converts to HTTPS for public buckets
160        Ok(Self {
161            bucket,
162            key,
163            size: 0, // Would be fetched via HEAD
164            url: url.to_string(),
165        })
166    }
167
168    /// Create from an HTTPS URL pointing to S3-hosted content
169    pub fn from_https(url: &str) -> AnyResult<Self> {
170        // Use HTTP reader under the hood for public S3 buckets
171        let http_reader = HttpRangeReader::new(url)?;
172
173        Ok(Self {
174            bucket: String::new(),
175            key: String::new(),
176            size: http_reader.size,
177            url: url.to_string(),
178        })
179    }
180}
181
182impl RangeReader for S3RangeReader {
183    fn read_range(&self, offset: u64, length: usize) -> AnyResult<Vec<u8>> {
184        // For public S3 buckets, use HTTP range requests
185        // For private buckets, this would use aws-sdk-s3 with credentials
186        let client = reqwest::blocking::Client::new();
187        let range = format!("bytes={}-{}", offset, offset + length as u64 - 1);
188
189        let response = client
190            .get(&self.url)
191            .header("Range", range)
192            .send()?;
193
194        if !response.status().is_success() {
195            return Err(format!("S3 request failed: {}", response.status()).into());
196        }
197
198        Ok(response.bytes()?.to_vec())
199    }
200
201    fn size(&self) -> u64 {
202        self.size
203    }
204
205    fn identifier(&self) -> &str {
206        &self.url
207    }
208}
209
210/// Create a range reader from a path or URL
211pub fn create_range_reader(source: &str) -> AnyResult<Arc<dyn RangeReader>> {
212    if source.starts_with("s3://") {
213        // Use the proper S3 reader that supports credentials and custom endpoints
214        Ok(Arc::new(crate::s3::S3RangeReaderSync::new(source)?))
215    } else if source.starts_with("http://") || source.starts_with("https://") {
216        Ok(Arc::new(HttpRangeReader::new(source)?))
217    } else {
218        Ok(Arc::new(LocalRangeReader::new(source)?))
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use std::io::Write;
226    use tempfile::NamedTempFile;
227
228    #[test]
229    fn test_local_range_reader() {
230        let mut file = NamedTempFile::new().unwrap();
231        file.write_all(b"Hello, World!").unwrap();
232
233        let reader = LocalRangeReader::new(file.path()).unwrap();
234        assert_eq!(reader.size(), 13);
235
236        let data = reader.read_range(0, 5).unwrap();
237        assert_eq!(&data, b"Hello");
238
239        let data = reader.read_range(7, 5).unwrap();
240        assert_eq!(&data, b"World");
241    }
242}