geocog/
cog_reader.rs

1//! Pure Rust COG (Cloud Optimized `GeoTIFF`) reader
2//!
3//! This module implements efficient COG reading following the COG specification:
4//! - Reads only the IFD metadata on initialization (typically < 16KB)
5//! - Uses range requests for tile data (no full file downloads)
6//! - Caches decompressed tiles with LRU eviction
7//! - Supports multiple sources: local files, HTTP, S3
8//!
9//! Key optimizations:
10//! - Min/max from GDAL statistics tags (no full scan needed)
11//! - Data type detection from TIFF tags (no trial-and-error)
12//! - CRS detection from `GeoKey` directory
13//! - Single transform inversion per tile (not per pixel)
14
15use crate::range_reader::{create_range_reader, RangeReader};
16use crate::tiff_utils::AnyResult;
17use std::collections::HashMap;
18use std::sync::Arc;
19
20// TIFF tag constants
21const TAG_IMAGE_WIDTH: u16 = 256;
22const TAG_IMAGE_LENGTH: u16 = 257;
23const TAG_BITS_PER_SAMPLE: u16 = 258;
24const TAG_COMPRESSION: u16 = 259;
25const TAG_SAMPLES_PER_PIXEL: u16 = 277;
26const TAG_PREDICTOR: u16 = 317;
27const TAG_ROWS_PER_STRIP: u16 = 278;
28const TAG_STRIP_OFFSETS: u16 = 273;
29const TAG_STRIP_BYTE_COUNTS: u16 = 279;
30const TAG_TILE_WIDTH: u16 = 322;
31const TAG_TILE_LENGTH: u16 = 323;
32const TAG_TILE_OFFSETS: u16 = 324;
33const TAG_TILE_BYTE_COUNTS: u16 = 325;
34const TAG_SAMPLE_FORMAT: u16 = 339;
35const TAG_MODEL_PIXEL_SCALE: u16 = 33550;
36const TAG_MODEL_TIEPOINT: u16 = 33922;
37const TAG_GEO_KEY_DIRECTORY: u16 = 34735;
38const TAG_GDAL_METADATA: u16 = 42112;
39const TAG_GDAL_NODATA: u16 = 42113;
40
41// GeoKey constants
42const GEO_KEY_GEOGRAPHIC_TYPE: u16 = 2048;
43const GEO_KEY_PROJECTED_CRS: u16 = 3072;
44
45// Compression constants
46const COMPRESSION_NONE: u16 = 1;
47const COMPRESSION_LZW: u16 = 5;
48const COMPRESSION_DEFLATE: u16 = 8;
49const COMPRESSION_ZSTD: u16 = 50000;
50
51// Sample format constants
52const SAMPLE_FORMAT_UINT: u16 = 1;
53const SAMPLE_FORMAT_INT: u16 = 2;
54const SAMPLE_FORMAT_FLOAT: u16 = 3;
55
56/// Data type detected from TIFF tags
57#[derive(Debug, Clone, Copy, PartialEq, Eq)]
58pub enum CogDataType {
59    UInt8,
60    UInt16,
61    UInt32,
62    UInt64,
63    Int8,
64    Int16,
65    Int32,
66    Int64,
67    Float32,
68    Float64,
69}
70
71impl CogDataType {
72    #[must_use] pub fn bytes_per_sample(&self) -> usize {
73        match self {
74            CogDataType::UInt8 | CogDataType::Int8 => 1,
75            CogDataType::UInt16 | CogDataType::Int16 => 2,
76            CogDataType::UInt32 | CogDataType::Int32 | CogDataType::Float32 => 4,
77            CogDataType::UInt64 | CogDataType::Int64 | CogDataType::Float64 => 8,
78        }
79    }
80
81    /// Detect data type from TIFF tags
82    #[must_use] pub fn from_tags(bits_per_sample: u16, sample_format: u16) -> Option<Self> {
83        match (sample_format, bits_per_sample) {
84            (SAMPLE_FORMAT_UINT, 8) => Some(CogDataType::UInt8),
85            (SAMPLE_FORMAT_UINT, 16) => Some(CogDataType::UInt16),
86            (SAMPLE_FORMAT_UINT, 32) => Some(CogDataType::UInt32),
87            (SAMPLE_FORMAT_UINT, 64) => Some(CogDataType::UInt64),
88            (SAMPLE_FORMAT_INT, 8) => Some(CogDataType::Int8),
89            (SAMPLE_FORMAT_INT, 16) => Some(CogDataType::Int16),
90            (SAMPLE_FORMAT_INT, 32) => Some(CogDataType::Int32),
91            (SAMPLE_FORMAT_INT, 64) => Some(CogDataType::Int64),
92            (SAMPLE_FORMAT_FLOAT, 32) => Some(CogDataType::Float32),
93            (SAMPLE_FORMAT_FLOAT, 64) => Some(CogDataType::Float64),
94            // Default to unsigned if sample format not specified
95            (_, 8) => Some(CogDataType::UInt8),
96            (_, 16) => Some(CogDataType::UInt16),
97            (_, 32) => Some(CogDataType::UInt32),
98            _ => None,
99        }
100    }
101}
102
103/// Compression method
104#[derive(Debug, Clone, Copy, PartialEq, Eq)]
105pub enum Compression {
106    None,
107    Lzw,
108    Deflate,
109    Zstd,
110}
111
112impl Compression {
113    #[must_use] pub fn from_tag(value: u16) -> Option<Self> {
114        match value {
115            COMPRESSION_NONE => Some(Compression::None),
116            COMPRESSION_LZW => Some(Compression::Lzw),
117            COMPRESSION_DEFLATE | 32946 => Some(Compression::Deflate), // 32946 is old deflate
118            COMPRESSION_ZSTD => Some(Compression::Zstd),
119            _ => None,
120        }
121    }
122}
123
124/// `GeoTIFF` transform information
125#[derive(Debug, Clone)]
126pub struct GeoTransform {
127    /// Pixel scale (`x_scale`, `y_scale`, `z_scale`)
128    pub pixel_scale: Option<[f64; 3]>,
129    /// Tiepoint (i, j, k, x, y, z) - maps pixel (i,j,k) to world (x,y,z)
130    pub tiepoint: Option<[f64; 6]>,
131}
132
133impl GeoTransform {
134    /// Convert pixel coordinates to world coordinates
135    #[must_use] pub fn pixel_to_world(&self, px: f64, py: f64) -> Option<(f64, f64)> {
136        let scale = self.pixel_scale?;
137        let tie = self.tiepoint?;
138
139        let world_x = tie[3] + (px - tie[0]) * scale[0];
140        let world_y = tie[4] - (py - tie[1]) * scale[1]; // Y is typically inverted
141
142        Some((world_x, world_y))
143    }
144
145    /// Convert world coordinates to pixel coordinates
146    #[must_use] pub fn world_to_pixel(&self, wx: f64, wy: f64) -> Option<(f64, f64)> {
147        let scale = self.pixel_scale?;
148        let tie = self.tiepoint?;
149
150        if scale[0] == 0.0 || scale[1] == 0.0 {
151            return None;
152        }
153
154        let px = tie[0] + (wx - tie[3]) / scale[0];
155        let py = tie[1] + (tie[4] - wy) / scale[1]; // Y is typically inverted
156
157        Some((px, py))
158    }
159
160    /// Get the world extent of the image
161    #[must_use] pub fn get_extent(&self, width: usize, height: usize) -> Option<(f64, f64, f64, f64)> {
162        let (minx, maxy) = self.pixel_to_world(0.0, 0.0)?;
163        let (maxx, miny) = self.pixel_to_world(width as f64, height as f64)?;
164        Some((minx, miny, maxx, maxy))
165    }
166}
167
168/// COG metadata - read from IFD without loading tile data
169#[derive(Debug, Clone)]
170pub struct CogMetadata {
171    /// Image dimensions
172    pub width: usize,
173    pub height: usize,
174
175    /// Tile dimensions (COG requirement)
176    pub tile_width: usize,
177    pub tile_height: usize,
178
179    /// Number of bands/samples
180    pub bands: usize,
181
182    /// Data type
183    pub data_type: CogDataType,
184
185    /// Compression method
186    pub compression: Compression,
187
188    /// Predictor (1=none, 2=horizontal differencing, 3=floating point)
189    pub predictor: u16,
190
191    /// Byte order
192    pub little_endian: bool,
193
194    /// Tile byte offsets in the file
195    pub tile_offsets: Vec<u64>,
196
197    /// Tile byte counts (compressed sizes)
198    pub tile_byte_counts: Vec<u64>,
199
200    /// Number of tiles across
201    pub tiles_across: usize,
202
203    /// Number of tiles down
204    pub tiles_down: usize,
205
206    /// Whether this is a tiled TIFF (true) or stripped TIFF (false)
207    /// Tiled TIFFs are COG-optimized, stripped TIFFs are not
208    pub is_tiled: bool,
209
210    /// Geographic transform
211    pub geo_transform: GeoTransform,
212
213    /// Detected CRS (EPSG code)
214    pub crs_code: Option<i32>,
215
216    /// Min/max values from GDAL statistics (if present)
217    pub stats_min: Option<f32>,
218    pub stats_max: Option<f32>,
219
220    /// `NoData` value
221    pub nodata: Option<f64>,
222}
223
224impl CogMetadata {
225    /// Check if this appears to be a valid COG (has tiles)
226    #[must_use] pub fn is_tiled(&self) -> bool {
227        self.tile_width > 0 && self.tile_height > 0
228    }
229
230    /// Get tile index for a pixel coordinate
231    #[must_use] pub fn tile_index_for_pixel(&self, px: usize, py: usize) -> Option<usize> {
232        if px >= self.width || py >= self.height {
233            return None;
234        }
235        let tile_col = px / self.tile_width;
236        let tile_row = py / self.tile_height;
237        Some(tile_row * self.tiles_across + tile_col)
238    }
239
240    /// Get pixel range within a tile
241    #[must_use] pub fn pixel_range_in_tile(&self, tile_index: usize) -> (usize, usize, usize, usize) {
242        let tile_col = tile_index % self.tiles_across;
243        let tile_row = tile_index / self.tiles_across;
244
245        let start_x = tile_col * self.tile_width;
246        let start_y = tile_row * self.tile_height;
247        let end_x = (start_x + self.tile_width).min(self.width);
248        let end_y = (start_y + self.tile_height).min(self.height);
249
250        (start_x, start_y, end_x, end_y)
251    }
252
253    /// Get number of valid pixels in a tile (handles edge tiles)
254    #[must_use] pub fn tile_pixel_count(&self, tile_index: usize) -> usize {
255        let (start_x, start_y, end_x, end_y) = self.pixel_range_in_tile(tile_index);
256        (end_x - start_x) * (end_y - start_y) * self.bands
257    }
258}
259
260/// Overview metadata - subset of `CogMetadata` for overviews
261#[derive(Debug, Clone)]
262pub struct OverviewMetadata {
263    pub width: usize,
264    pub height: usize,
265    pub tile_width: usize,
266    pub tile_height: usize,
267    pub tiles_across: usize,
268    pub tiles_down: usize,
269    pub tile_offsets: Vec<u64>,
270    pub tile_byte_counts: Vec<u64>,
271    /// Scale factor relative to full resolution (2, 4, 8, etc.)
272    pub scale: usize,
273}
274
275impl OverviewMetadata {
276    /// Get tile index for a pixel coordinate at this overview level
277    #[must_use] pub fn tile_index_for_pixel(&self, px: usize, py: usize) -> Option<usize> {
278        if px >= self.width || py >= self.height {
279            return None;
280        }
281        let tile_col = px / self.tile_width;
282        let tile_row = py / self.tile_height;
283        Some(tile_row * self.tiles_across + tile_col)
284    }
285}
286
287/// COG Reader - efficient COG access with range requests
288pub struct CogReader {
289    reader: Arc<dyn RangeReader>,
290    pub metadata: CogMetadata,
291    /// Overview levels (sorted by scale factor, smallest to largest)
292    pub overviews: Vec<OverviewMetadata>,
293    /// Minimum usable overview index - overviews beyond this have insufficient data
294    /// None means all overviews are usable, Some(n) means only overviews 0..n are usable
295    pub min_usable_overview: Option<usize>,
296}
297
298impl CogReader {
299    /// Open a COG from any source (local file, HTTP URL, or S3)
300    pub fn open(source: &str) -> AnyResult<Self> {
301        let reader = create_range_reader(source)?;
302        Self::from_reader(reader)
303    }
304
305    /// Open from an existing range reader
306    pub fn from_reader(reader: Arc<dyn RangeReader>) -> AnyResult<Self> {
307        // Read header to get IFD offset and byte order
308        let header_bytes = reader.read_range(0, 8)?;
309
310        let little_endian = match &header_bytes[0..2] {
311            b"II" => true,
312            b"MM" => false,
313            _ => return Err("Invalid TIFF signature".into()),
314        };
315
316        let version = read_u16(&header_bytes[2..4], little_endian);
317        if version != 42 {
318            return Err(format!("Invalid TIFF version: {version}").into());
319        }
320
321        let ifd_offset = read_u32(&header_bytes[4..8], little_endian);
322        let file_size = reader.size();
323
324        // Read IFD entries - estimate size based on typical COG (usually < 4KB)
325        // Clamp to available bytes if IFD is near end of file
326        let ifd_size_estimate = 4096.min((file_size - u64::from(ifd_offset)) as usize);
327        let ifd_bytes = reader.read_range(u64::from(ifd_offset), ifd_size_estimate)?;
328
329        let (metadata, next_ifd_offset) = parse_ifd_with_next(&ifd_bytes, &reader, u64::from(ifd_offset), little_endian)?;
330
331        // Read overview IFDs (subsequent IFDs in the chain)
332        let mut overviews = Vec::new();
333        let mut current_ifd_offset = next_ifd_offset;
334        let full_width = metadata.width;
335
336        while current_ifd_offset != 0 {
337            let ovr_ifd_size = 4096.min((file_size - u64::from(current_ifd_offset)) as usize);
338            let ovr_ifd_bytes = reader.read_range(u64::from(current_ifd_offset), ovr_ifd_size)?;
339
340            if let Ok((ovr_meta, next_offset)) = parse_overview_ifd(&ovr_ifd_bytes, &reader, u64::from(current_ifd_offset), little_endian, &metadata) {
341                // Calculate actual scale from dimensions using floor division
342                // This matches GDAL's behavior: scale = full_width / ovr_width
343                // For 20966/1310 this gives 16, not 17 (ceiling would be wrong)
344                let actual_scale = full_width / ovr_meta.width;
345
346                overviews.push(OverviewMetadata {
347                    width: ovr_meta.width,
348                    height: ovr_meta.height,
349                    tile_width: ovr_meta.tile_width,
350                    tile_height: ovr_meta.tile_height,
351                    tiles_across: ovr_meta.tiles_across,
352                    tiles_down: ovr_meta.tiles_down,
353                    tile_offsets: ovr_meta.tile_offsets,
354                    tile_byte_counts: ovr_meta.tile_byte_counts,
355                    scale: actual_scale,
356                });
357
358                current_ifd_offset = next_offset;
359            } else {
360                break;
361            }
362
363            // Safety limit - COGs typically have at most 10 overviews
364            if overviews.len() > 10 {
365                break;
366            }
367        }
368
369        let mut cog_reader = Self {
370            reader,
371            metadata,
372            overviews,
373            min_usable_overview: None,
374        };
375
376        // Analyze overview quality to find minimum usable level
377        cog_reader.analyze_overview_quality();
378
379        Ok(cog_reader)
380    }
381
382    /// Analyze overview quality by sampling tiles to find valid data density
383    /// This determines which overviews have enough data to be useful
384    fn analyze_overview_quality(&mut self) {
385        if self.overviews.is_empty() {
386            return;
387        }
388
389        // For each overview (from smallest/coarsest to largest/finest), check if it has enough data
390        // We sample a few tiles from each overview and check data density
391        //
392        // Use 5% threshold - this is aggressive but ensures good visual results for sparse data.
393        // For a file with 6% valid data at full res, overviews with <5% are significantly degraded.
394        // The trade-off is that sparse datasets will read more tiles at low zoom, but the visual
395        // quality improvement is dramatic (see barley crop data as example).
396        let min_density_threshold = 0.05; // 5% - require good data density for visual quality
397
398        let mut last_good_overview: Option<usize> = None;
399        let mut overview_densities: Vec<(usize, f64)> = Vec::new();
400
401        // Iterate from smallest overview (highest index, coarsest) to largest (index 0, finest)
402        for (idx, ovr) in self.overviews.iter().enumerate().rev() {
403            // Sample up to 3 tiles from this overview
404            let num_tiles = ovr.tile_offsets.len();
405            let sample_indices: Vec<usize> = if num_tiles <= 3 {
406                (0..num_tiles).collect()
407            } else {
408                // Sample first, middle, and last tiles
409                vec![0, num_tiles / 2, num_tiles - 1]
410            };
411
412            let mut total_pixels = 0usize;
413            let mut valid_pixels = 0usize;
414
415            for &tile_idx in &sample_indices {
416                if let Ok(data) = self.read_overview_tile(idx, tile_idx) {
417                    total_pixels += data.len();
418                    valid_pixels += data.iter().filter(|v| !v.is_nan() && **v != 0.0).count();
419                }
420            }
421
422            let density = if total_pixels > 0 {
423                valid_pixels as f64 / total_pixels as f64
424            } else {
425                0.0
426            };
427
428            overview_densities.push((idx, density));
429
430            if density >= min_density_threshold {
431                last_good_overview = Some(idx);
432                break; // Found a good overview, don't need to check higher resolution ones
433            }
434        }
435
436        // If we found a good overview, set it as the minimum usable
437        // All overviews with index > last_good_overview are too sparse
438        self.min_usable_overview = last_good_overview;
439    }
440
441    /// Find the best overview level for a given source extent size
442    ///
443    /// Parameters:
444    /// - `extent_src_width`: How many source pixels the extent covers at full resolution
445    /// - `extent_src_height`: How many source pixels the extent covers at full resolution
446    /// - `output_width`: How many pixels we're actually rendering (e.g., 256)
447    /// - `output_height`: How many pixels we're actually rendering (e.g., 256)
448    ///
449    /// Returns None if full resolution should be used
450    #[must_use] pub fn best_overview_for_resolution(&self, extent_src_width: usize, extent_src_height: usize) -> Option<usize> {
451        // If min_usable_overview is None, ALL overviews are too sparse - always use full resolution
452        // This is critical for sparse datasets where even the largest overview has insufficient data
453        if self.min_usable_overview.is_none() && !self.overviews.is_empty() {
454            return None;
455        }
456
457        // Default output tile size
458        let output_size = 256.0;
459
460        // Calculate how many source pixels per output pixel we'd need at full res
461        // If extent covers 21600 source pixels but we only output 256 pixels, we can use an 84x overview
462        // If extent covers 256 source pixels for 256 output, we need full resolution (scale = 1)
463        let scale_x = extent_src_width as f64 / output_size;
464        let scale_y = extent_src_height as f64 / output_size;
465        let needed_scale = scale_x.max(scale_y);
466
467        // If we need close to full resolution (1:1 or less), don't use an overview
468        if needed_scale < 1.5 {
469            return None;
470        }
471
472        // Find the best overview that has enough resolution
473        // We want the overview with the largest scale that's still <= needed_scale
474        // (i.e., the smallest overview that still has enough detail)
475        let mut best_idx = None;
476        let mut best_scale = 0usize;
477
478        for (idx, ovr) in self.overviews.iter().enumerate() {
479            // Skip overviews that have been determined to have insufficient data
480            // min_usable_overview = Some(n) means only overviews 0..=n have enough data
481            if let Some(min_usable) = self.min_usable_overview
482                && idx > min_usable {
483                    // This overview is too sparse (beyond the minimum usable level)
484                    continue;
485                }
486
487            // This overview has 1/scale resolution compared to full
488            // We can use it if the overview has at least as many pixels as we need
489            // needed_scale = extent_pixels / output_pixels
490            // If needed_scale = 84 and overview scale = 64, overview has enough resolution
491            if ovr.scale <= (needed_scale as usize) && ovr.scale > best_scale {
492                best_scale = ovr.scale;
493                best_idx = Some(idx);
494            }
495        }
496
497        best_idx
498    }
499
500    /// Read a tile from a specific overview level
501    pub fn read_overview_tile(&self, overview_idx: usize, tile_index: usize) -> AnyResult<Vec<f32>> {
502        let ovr = self.overviews.get(overview_idx)
503            .ok_or_else(|| format!("Overview index {overview_idx} out of range"))?;
504
505        if tile_index >= ovr.tile_offsets.len() {
506            return Err(format!(
507                "Tile index {} out of range (max {})",
508                tile_index,
509                ovr.tile_offsets.len()
510            ).into());
511        }
512
513        let offset = ovr.tile_offsets[tile_index];
514        let byte_count = ovr.tile_byte_counts[tile_index] as usize;
515
516        if byte_count == 0 {
517            let pixel_count = ovr.tile_width * ovr.tile_height * self.metadata.bands;
518            return Ok(vec![f32::NAN; pixel_count]);
519        }
520
521        let compressed = self.reader.read_range(offset, byte_count)?;
522
523        let decompressed = decompress_tile(
524            &compressed,
525            self.metadata.compression,
526            ovr.tile_width,
527            ovr.tile_height,
528            self.metadata.bands,
529            self.metadata.data_type.bytes_per_sample(),
530        )?;
531
532        let unpredicted = apply_predictor(
533            &decompressed,
534            self.metadata.predictor,
535            ovr.tile_width,
536            self.metadata.bands,
537            self.metadata.data_type.bytes_per_sample(),
538        )?;
539
540        convert_to_f32(
541            &unpredicted,
542            self.metadata.data_type,
543            self.metadata.little_endian,
544        )
545    }
546
547    /// Read a single tile's raw data and decompress
548    pub fn read_tile(&self, tile_index: usize) -> AnyResult<Vec<f32>> {
549        if tile_index >= self.metadata.tile_offsets.len() {
550            return Err(format!(
551                "Tile index {} out of range (max {})",
552                tile_index,
553                self.metadata.tile_offsets.len()
554            )
555            .into());
556        }
557
558        let offset = self.metadata.tile_offsets[tile_index];
559        let byte_count = self.metadata.tile_byte_counts[tile_index] as usize;
560
561        if byte_count == 0 {
562            // Empty tile - return NaN-filled data
563            let pixel_count = self.metadata.tile_width * self.metadata.tile_height * self.metadata.bands;
564            return Ok(vec![f32::NAN; pixel_count]);
565        }
566
567        let compressed = self.reader.read_range(offset, byte_count)?;
568
569        // Decompress
570        let decompressed = decompress_tile(
571            &compressed,
572            self.metadata.compression,
573            self.metadata.tile_width,
574            self.metadata.tile_height,
575            self.metadata.bands,
576            self.metadata.data_type.bytes_per_sample(),
577        )?;
578
579        // Apply predictor if needed
580        let unpredicted = apply_predictor(
581            &decompressed,
582            self.metadata.predictor,
583            self.metadata.tile_width,
584            self.metadata.bands,
585            self.metadata.data_type.bytes_per_sample(),
586        )?;
587
588        // Convert to f32
589        convert_to_f32(
590            &unpredicted,
591            self.metadata.data_type,
592            self.metadata.little_endian,
593        )
594    }
595
596    /// Sample a single pixel value
597    pub fn sample(&self, band: usize, x: usize, y: usize) -> AnyResult<Option<f32>> {
598        let Some(tile_index) = self.metadata.tile_index_for_pixel(x, y) else {
599            return Ok(None);
600        };
601
602        let tile_data = self.read_tile(tile_index)?;
603
604        // Calculate position within tile
605        let tile_col = tile_index % self.metadata.tiles_across;
606        let tile_row = tile_index / self.metadata.tiles_across;
607        let local_x = x - tile_col * self.metadata.tile_width;
608        let local_y = y - tile_row * self.metadata.tile_height;
609
610        let idx = (local_y * self.metadata.tile_width + local_x) * self.metadata.bands + band;
611        Ok(tile_data.get(idx).copied())
612    }
613
614    /// Estimate min/max from sampling (when GDAL stats not available)
615    ///
616    /// For local files: Scans ALL tiles for accurate min/max values
617    /// For remote files (S3/HTTP): Samples a few tiles for efficiency
618    ///
619    /// Use `estimate_min_max_fast()` to always use fast sampling regardless of source.
620    pub fn estimate_min_max(&self) -> AnyResult<(f32, f32)> {
621        // First check for GDAL statistics
622        if let (Some(min), Some(max)) = (self.metadata.stats_min, self.metadata.stats_max) {
623            return Ok((min, max));
624        }
625
626        // For local files, do a full scan for accuracy
627        // For remote files, use fast sampling to minimize network requests
628        if self.reader.is_local() {
629            self.estimate_min_max_full_scan()
630        } else {
631            self.estimate_min_max_fast()
632        }
633    }
634
635    /// Fast min/max estimation - samples only corner and center tiles
636    /// Use this for remote files where full scans are expensive
637    pub fn estimate_min_max_fast(&self) -> AnyResult<(f32, f32)> {
638        // First check for GDAL statistics
639        if let (Some(min), Some(max)) = (self.metadata.stats_min, self.metadata.stats_max) {
640            return Ok((min, max));
641        }
642
643        // For files with overviews, sample from the smallest overview (most efficient)
644        if !self.overviews.is_empty() {
645            let smallest_ovr_idx = self.overviews.len() - 1;
646            let ovr = &self.overviews[smallest_ovr_idx];
647            let total_tiles = ovr.tile_offsets.len();
648
649            // Sample corner tiles + center tile from smallest overview
650            let sample_indices: Vec<usize> = if total_tiles <= 5 {
651                (0..total_tiles).collect()
652            } else {
653                vec![
654                    0,
655                    ovr.tiles_across.saturating_sub(1),
656                    total_tiles / 2,
657                    total_tiles.saturating_sub(ovr.tiles_across),
658                    total_tiles.saturating_sub(1),
659                ]
660            };
661
662            return self.scan_tiles_for_minmax(&sample_indices, Some(smallest_ovr_idx));
663        }
664
665        // No overviews - sample from full resolution tiles
666        let total_tiles = self.metadata.tile_offsets.len();
667        let sample_indices: Vec<usize> = if total_tiles <= 5 {
668            (0..total_tiles).collect()
669        } else {
670            vec![
671                0,                                          // Top-left
672                self.metadata.tiles_across.saturating_sub(1), // Top-right
673                total_tiles / 2,                            // Center
674                total_tiles.saturating_sub(self.metadata.tiles_across), // Bottom-left
675                total_tiles.saturating_sub(1),              // Bottom-right
676            ]
677        };
678
679        self.scan_tiles_for_minmax(&sample_indices, None)
680    }
681
682    /// Full scan min/max estimation - reads ALL tiles
683    /// Use this for local files where disk I/O is fast
684    fn estimate_min_max_full_scan(&self) -> AnyResult<(f32, f32)> {
685        // For files with overviews, scan the smallest overview (much faster)
686        if !self.overviews.is_empty() {
687            let smallest_ovr_idx = self.overviews.len() - 1;
688            let ovr = &self.overviews[smallest_ovr_idx];
689            let all_indices: Vec<usize> = (0..ovr.tile_offsets.len()).collect();
690            return self.scan_tiles_for_minmax(&all_indices, Some(smallest_ovr_idx));
691        }
692
693        // No overviews - must scan full resolution
694        let all_indices: Vec<usize> = (0..self.metadata.tile_offsets.len()).collect();
695        self.scan_tiles_for_minmax(&all_indices, None)
696    }
697
698    /// Helper to scan specific tiles for min/max values
699    fn scan_tiles_for_minmax(&self, indices: &[usize], overview_idx: Option<usize>) -> AnyResult<(f32, f32)> {
700        let mut min = f32::INFINITY;
701        let mut max = f32::NEG_INFINITY;
702        let nodata = self.metadata.nodata;
703
704        for &tile_idx in indices {
705            let tile_data = if let Some(ovr_idx) = overview_idx {
706                self.read_overview_tile(ovr_idx, tile_idx)?
707            } else {
708                self.read_tile(tile_idx)?
709            };
710
711            for &val in &tile_data {
712                // Skip NaN and nodata values
713                if val.is_nan() {
714                    continue;
715                }
716                if let Some(nd) = nodata
717                    && (f64::from(val) - nd).abs() < 0.001 {
718                        continue;
719                    }
720                if val < min {
721                    min = val;
722                }
723                if val > max {
724                    max = val;
725                }
726            }
727        }
728
729        if min.is_infinite() || max.is_infinite() {
730            Ok((0.0, 1.0)) // Fallback
731        } else {
732            Ok((min, max))
733        }
734    }
735}
736
737// ============================================================================
738// Helper functions for reading TIFF data
739// ============================================================================
740
741fn read_u16(bytes: &[u8], little_endian: bool) -> u16 {
742    if little_endian {
743        u16::from_le_bytes([bytes[0], bytes[1]])
744    } else {
745        u16::from_be_bytes([bytes[0], bytes[1]])
746    }
747}
748
749fn read_u32(bytes: &[u8], little_endian: bool) -> u32 {
750    if little_endian {
751        u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
752    } else {
753        u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
754    }
755}
756
757fn read_u64(bytes: &[u8], little_endian: bool) -> u64 {
758    if little_endian {
759        u64::from_le_bytes([
760            bytes[0], bytes[1], bytes[2], bytes[3],
761            bytes[4], bytes[5], bytes[6], bytes[7],
762        ])
763    } else {
764        u64::from_be_bytes([
765            bytes[0], bytes[1], bytes[2], bytes[3],
766            bytes[4], bytes[5], bytes[6], bytes[7],
767        ])
768    }
769}
770
771fn read_f64(bytes: &[u8], little_endian: bool) -> f64 {
772    if little_endian {
773        f64::from_le_bytes([
774            bytes[0], bytes[1], bytes[2], bytes[3],
775            bytes[4], bytes[5], bytes[6], bytes[7],
776        ])
777    } else {
778        f64::from_be_bytes([
779            bytes[0], bytes[1], bytes[2], bytes[3],
780            bytes[4], bytes[5], bytes[6], bytes[7],
781        ])
782    }
783}
784
785/// Parse IFD and extract all COG metadata
786fn parse_ifd(
787    ifd_bytes: &[u8],
788    reader: &Arc<dyn RangeReader>,
789    ifd_offset: u64,
790    little_endian: bool,
791) -> AnyResult<CogMetadata> {
792    let entry_count = read_u16(&ifd_bytes[0..2], little_endian) as usize;
793
794    // Parse all IFD entries into a map
795    let mut tags: HashMap<u16, IfdEntry> = HashMap::new();
796
797    for i in 0..entry_count {
798        let offset = 2 + i * 12;
799        if offset + 12 > ifd_bytes.len() {
800            break;
801        }
802
803        let tag = read_u16(&ifd_bytes[offset..offset + 2], little_endian);
804        let field_type = read_u16(&ifd_bytes[offset + 2..offset + 4], little_endian);
805        let count = read_u32(&ifd_bytes[offset + 4..offset + 8], little_endian);
806        let value_offset = read_u32(&ifd_bytes[offset + 8..offset + 12], little_endian);
807
808        tags.insert(
809            tag,
810            IfdEntry {
811                field_type,
812                count,
813                value_offset,
814                raw_bytes: [
815                    ifd_bytes[offset + 8],
816                    ifd_bytes[offset + 9],
817                    ifd_bytes[offset + 10],
818                    ifd_bytes[offset + 11],
819                ],
820            },
821        );
822    }
823
824    // Extract required tags
825    let width = get_tag_value(&tags, TAG_IMAGE_WIDTH, little_endian)
826        .ok_or("Missing ImageWidth tag")? as usize;
827    let height = get_tag_value(&tags, TAG_IMAGE_LENGTH, little_endian)
828        .ok_or("Missing ImageLength tag")? as usize;
829
830    let bits_per_sample = get_tag_value(&tags, TAG_BITS_PER_SAMPLE, little_endian).unwrap_or(8) as u16;
831    let sample_format = get_tag_value(&tags, TAG_SAMPLE_FORMAT, little_endian).unwrap_or(1) as u16;
832    let bands = get_tag_value(&tags, TAG_SAMPLES_PER_PIXEL, little_endian).unwrap_or(1) as usize;
833    let compression_val = get_tag_value(&tags, TAG_COMPRESSION, little_endian).unwrap_or(1) as u16;
834    let predictor = get_tag_value(&tags, TAG_PREDICTOR, little_endian).unwrap_or(1) as u16;
835
836    let data_type = CogDataType::from_tags(bits_per_sample, sample_format)
837        .ok_or_else(|| format!("Unsupported data type: bits={bits_per_sample}, format={sample_format}"))?;
838
839    let compression = Compression::from_tag(compression_val)
840        .ok_or_else(|| format!("Unsupported compression: {compression_val}"))?;
841
842    // Detect if tiled or stripped TIFF
843    let has_tile_tags = tags.contains_key(&TAG_TILE_OFFSETS);
844    let has_strip_tags = tags.contains_key(&TAG_STRIP_OFFSETS);
845    let is_tiled = has_tile_tags;
846
847    // For tiled: use tile dimensions; for stripped: tile_width = image width, tile_height = rows_per_strip
848    let (tile_width, tile_height, tiles_across, tiles_down, tile_offsets, tile_byte_counts) = if is_tiled {
849        // Tiled TIFF (COG-optimized)
850        let tw = get_tag_value(&tags, TAG_TILE_WIDTH, little_endian).unwrap_or(width as u32) as usize;
851        let th = get_tag_value(&tags, TAG_TILE_LENGTH, little_endian).unwrap_or(height as u32) as usize;
852        let ta = width.div_ceil(tw);
853        let td = height.div_ceil(th);
854        let total_tiles = ta * td;
855
856        let offsets = read_tag_array_u64(
857            &tags,
858            TAG_TILE_OFFSETS,
859            reader,
860            ifd_offset,
861            little_endian,
862            total_tiles,
863        )?;
864
865        let byte_counts = read_tag_array_u64(
866            &tags,
867            TAG_TILE_BYTE_COUNTS,
868            reader,
869            ifd_offset,
870            little_endian,
871            total_tiles,
872        )?;
873
874        (tw, th, ta, td, offsets, byte_counts)
875    } else if has_strip_tags {
876        // Stripped TIFF (not COG-optimized)
877        // Treat strips as "tiles" that span the full image width
878        let rows_per_strip = get_tag_value(&tags, TAG_ROWS_PER_STRIP, little_endian)
879            .unwrap_or(height as u32) as usize;
880        let tw = width; // Strip width = image width
881        let th = rows_per_strip;
882        let ta = 1; // Only 1 "tile" across (strips span full width)
883        let td = height.div_ceil(rows_per_strip);
884        let total_strips = td;
885
886        let offsets = read_tag_array_u64(
887            &tags,
888            TAG_STRIP_OFFSETS,
889            reader,
890            ifd_offset,
891            little_endian,
892            total_strips,
893        )?;
894
895        let byte_counts = read_tag_array_u64(
896            &tags,
897            TAG_STRIP_BYTE_COUNTS,
898            reader,
899            ifd_offset,
900            little_endian,
901            total_strips,
902        )?;
903
904        (tw, th, ta, td, offsets, byte_counts)
905    } else {
906        return Err("TIFF has neither tile nor strip tags".into());
907    };
908
909    // Read geo transform
910    let pixel_scale = read_tag_f64_array(&tags, TAG_MODEL_PIXEL_SCALE, reader, ifd_offset, little_endian, 3)?;
911    let tiepoint = read_tag_f64_array(&tags, TAG_MODEL_TIEPOINT, reader, ifd_offset, little_endian, 6)?;
912
913    let geo_transform = GeoTransform {
914        pixel_scale: pixel_scale.map(|v| [v[0], v[1], v[2]]),
915        tiepoint: tiepoint.map(|v| [v[0], v[1], v[2], v[3], v[4], v[5]]),
916    };
917
918    // Read CRS from GeoKey directory
919    let crs_code = read_crs_from_geokeys(&tags, reader, ifd_offset, little_endian)?;
920
921    // Read GDAL statistics
922    let (stats_min, stats_max) = read_gdal_stats(&tags, reader, ifd_offset, little_endian)?;
923
924    // Read nodata
925    let nodata = read_gdal_nodata(&tags, reader, ifd_offset, little_endian)?;
926
927    Ok(CogMetadata {
928        width,
929        height,
930        tile_width,
931        tile_height,
932        bands,
933        data_type,
934        compression,
935        predictor,
936        little_endian,
937        tile_offsets,
938        tile_byte_counts,
939        tiles_across,
940        tiles_down,
941        is_tiled,
942        geo_transform,
943        crs_code,
944        stats_min,
945        stats_max,
946        nodata,
947    })
948}
949
950/// Parse IFD and return metadata plus next IFD offset
951fn parse_ifd_with_next(
952    ifd_bytes: &[u8],
953    reader: &Arc<dyn RangeReader>,
954    ifd_offset: u64,
955    little_endian: bool,
956) -> AnyResult<(CogMetadata, u32)> {
957    let entry_count = read_u16(&ifd_bytes[0..2], little_endian) as usize;
958
959    // The next IFD offset is right after all entries
960    let next_ifd_pos = 2 + entry_count * 12;
961    let next_ifd_offset = if next_ifd_pos + 4 <= ifd_bytes.len() {
962        read_u32(&ifd_bytes[next_ifd_pos..next_ifd_pos + 4], little_endian)
963    } else {
964        0
965    };
966
967    let metadata = parse_ifd(ifd_bytes, reader, ifd_offset, little_endian)?;
968    Ok((metadata, next_ifd_offset))
969}
970
971/// Simplified metadata for overview IFDs
972struct OverviewIfdData {
973    width: usize,
974    height: usize,
975    tile_width: usize,
976    tile_height: usize,
977    tiles_across: usize,
978    tiles_down: usize,
979    tile_offsets: Vec<u64>,
980    tile_byte_counts: Vec<u64>,
981}
982
983/// Parse an overview IFD (simpler than full IFD parsing)
984fn parse_overview_ifd(
985    ifd_bytes: &[u8],
986    reader: &Arc<dyn RangeReader>,
987    ifd_offset: u64,
988    little_endian: bool,
989    _full_meta: &CogMetadata, // For inheriting compression, data type, etc.
990) -> AnyResult<(OverviewIfdData, u32)> {
991    let entry_count = read_u16(&ifd_bytes[0..2], little_endian) as usize;
992
993    // Parse all IFD entries into a map
994    let mut tags: HashMap<u16, IfdEntry> = HashMap::new();
995
996    for i in 0..entry_count {
997        let offset = 2 + i * 12;
998        if offset + 12 > ifd_bytes.len() {
999            break;
1000        }
1001
1002        let tag = read_u16(&ifd_bytes[offset..offset + 2], little_endian);
1003        let field_type = read_u16(&ifd_bytes[offset + 2..offset + 4], little_endian);
1004        let count = read_u32(&ifd_bytes[offset + 4..offset + 8], little_endian);
1005        let value_offset = read_u32(&ifd_bytes[offset + 8..offset + 12], little_endian);
1006
1007        tags.insert(
1008            tag,
1009            IfdEntry {
1010                field_type,
1011                count,
1012                value_offset,
1013                raw_bytes: [
1014                    ifd_bytes[offset + 8],
1015                    ifd_bytes[offset + 9],
1016                    ifd_bytes[offset + 10],
1017                    ifd_bytes[offset + 11],
1018                ],
1019            },
1020        );
1021    }
1022
1023    // Get next IFD offset
1024    let next_ifd_pos = 2 + entry_count * 12;
1025    let next_ifd_offset = if next_ifd_pos + 4 <= ifd_bytes.len() {
1026        read_u32(&ifd_bytes[next_ifd_pos..next_ifd_pos + 4], little_endian)
1027    } else {
1028        0
1029    };
1030
1031    // Extract dimensions and tile info
1032    let width = get_tag_value(&tags, TAG_IMAGE_WIDTH, little_endian)
1033        .ok_or("Overview missing ImageWidth tag")? as usize;
1034    let height = get_tag_value(&tags, TAG_IMAGE_LENGTH, little_endian)
1035        .ok_or("Overview missing ImageLength tag")? as usize;
1036
1037    let tile_width = get_tag_value(&tags, TAG_TILE_WIDTH, little_endian)
1038        .ok_or("Overview missing TileWidth tag")? as usize;
1039    let tile_height = get_tag_value(&tags, TAG_TILE_LENGTH, little_endian)
1040        .ok_or("Overview missing TileLength tag")? as usize;
1041
1042    let tiles_across = width.div_ceil(tile_width);
1043    let tiles_down = height.div_ceil(tile_height);
1044    let total_tiles = tiles_across * tiles_down;
1045
1046    // Read tile offsets and byte counts
1047    let tile_offsets = read_tag_array_u64(
1048        &tags,
1049        TAG_TILE_OFFSETS,
1050        reader,
1051        ifd_offset,
1052        little_endian,
1053        total_tiles,
1054    )?;
1055
1056    let tile_byte_counts = read_tag_array_u64(
1057        &tags,
1058        TAG_TILE_BYTE_COUNTS,
1059        reader,
1060        ifd_offset,
1061        little_endian,
1062        total_tiles,
1063    )?;
1064
1065    Ok((OverviewIfdData {
1066        width,
1067        height,
1068        tile_width,
1069        tile_height,
1070        tiles_across,
1071        tiles_down,
1072        tile_offsets,
1073        tile_byte_counts,
1074    }, next_ifd_offset))
1075}
1076
1077struct IfdEntry {
1078    field_type: u16,
1079    count: u32,
1080    value_offset: u32,
1081    raw_bytes: [u8; 4],
1082}
1083
1084fn get_tag_value(tags: &HashMap<u16, IfdEntry>, tag: u16, little_endian: bool) -> Option<u32> {
1085    let entry = tags.get(&tag)?;
1086    let type_size = match entry.field_type {
1087        1 => 1, // BYTE
1088        3 => 2, // SHORT
1089        4 => 4, // LONG
1090        _ => return None,
1091    };
1092
1093    if entry.count == 1 && type_size <= 4 {
1094        // Value is inline
1095        match entry.field_type {
1096            1 => Some(u32::from(entry.raw_bytes[0])),
1097            3 => Some(u32::from(read_u16(&entry.raw_bytes, little_endian))),
1098            4 => Some(read_u32(&entry.raw_bytes, little_endian)),
1099            _ => None,
1100        }
1101    } else {
1102        None // Would need to read from offset
1103    }
1104}
1105
1106fn read_tag_array_u64(
1107    tags: &HashMap<u16, IfdEntry>,
1108    tag: u16,
1109    reader: &Arc<dyn RangeReader>,
1110    _ifd_offset: u64,
1111    little_endian: bool,
1112    expected_count: usize,
1113) -> AnyResult<Vec<u64>> {
1114    let entry = tags.get(&tag).ok_or_else(|| format!("Missing tag {tag}"))?;
1115
1116    let type_size = match entry.field_type {
1117        3 => 2, // SHORT
1118        4 => 4, // LONG
1119        16 => 8, // LONG8
1120        _ => return Err(format!("Unsupported type {} for tag {}", entry.field_type, tag).into()),
1121    };
1122
1123    let total_bytes = entry.count as usize * type_size;
1124
1125    let raw_bytes = if total_bytes <= 4 {
1126        entry.raw_bytes[..total_bytes].to_vec()
1127    } else {
1128        reader.read_range(u64::from(entry.value_offset), total_bytes)?
1129    };
1130
1131    let mut values = Vec::with_capacity(entry.count as usize);
1132    for i in 0..entry.count as usize {
1133        let offset = i * type_size;
1134        let value = match entry.field_type {
1135            3 => u64::from(read_u16(&raw_bytes[offset..], little_endian)),
1136            4 => u64::from(read_u32(&raw_bytes[offset..], little_endian)),
1137            16 => read_u64(&raw_bytes[offset..], little_endian),
1138            _ => 0,
1139        };
1140        values.push(value);
1141    }
1142
1143    // Pad with zeros if we got fewer than expected
1144    while values.len() < expected_count {
1145        values.push(0);
1146    }
1147
1148    Ok(values)
1149}
1150
1151fn read_tag_f64_array(
1152    tags: &HashMap<u16, IfdEntry>,
1153    tag: u16,
1154    reader: &Arc<dyn RangeReader>,
1155    _ifd_offset: u64,
1156    little_endian: bool,
1157    min_count: usize,
1158) -> AnyResult<Option<Vec<f64>>> {
1159    let Some(entry) = tags.get(&tag) else {
1160        return Ok(None);
1161    };
1162
1163    if entry.field_type != 12 {
1164        // DOUBLE
1165        return Ok(None);
1166    }
1167
1168    if (entry.count as usize) < min_count {
1169        return Ok(None);
1170    }
1171
1172    let total_bytes = entry.count as usize * 8;
1173    let raw_bytes = reader.read_range(u64::from(entry.value_offset), total_bytes)?;
1174
1175    let mut values = Vec::with_capacity(entry.count as usize);
1176    for i in 0..entry.count as usize {
1177        let offset = i * 8;
1178        values.push(read_f64(&raw_bytes[offset..], little_endian));
1179    }
1180
1181    Ok(Some(values))
1182}
1183
1184fn read_crs_from_geokeys(
1185    tags: &HashMap<u16, IfdEntry>,
1186    reader: &Arc<dyn RangeReader>,
1187    _ifd_offset: u64,
1188    little_endian: bool,
1189) -> AnyResult<Option<i32>> {
1190    let Some(entry) = tags.get(&TAG_GEO_KEY_DIRECTORY) else {
1191        return Ok(None);
1192    };
1193
1194    // GeoKey directory is an array of SHORT values
1195    if entry.field_type != 3 {
1196        return Ok(None);
1197    }
1198
1199    let total_bytes = entry.count as usize * 2;
1200    let raw_bytes = if total_bytes <= 4 {
1201        entry.raw_bytes[..total_bytes].to_vec()
1202    } else {
1203        reader.read_range(u64::from(entry.value_offset), total_bytes)?
1204    };
1205
1206    // Parse GeoKey directory header
1207    // Format: KeyDirectoryVersion, KeyRevision, MinorRevision, NumberOfKeys
1208    //         KeyID, TIFFTagLocation, Count, Value_Offset
1209    //         ...repeated for each key
1210
1211    if raw_bytes.len() < 8 {
1212        return Ok(None);
1213    }
1214
1215    let num_keys = read_u16(&raw_bytes[6..8], little_endian) as usize;
1216
1217    for i in 0..num_keys {
1218        let offset = 8 + i * 8;
1219        if offset + 8 > raw_bytes.len() {
1220            break;
1221        }
1222
1223        let key_id = read_u16(&raw_bytes[offset..], little_endian);
1224        let _tiff_tag_location = read_u16(&raw_bytes[offset + 2..], little_endian);
1225        let _count = read_u16(&raw_bytes[offset + 4..], little_endian);
1226        let value = read_u16(&raw_bytes[offset + 6..], little_endian);
1227
1228        // Check for ProjectedCSTypeGeoKey (3072) or GeographicTypeGeoKey (2048)
1229        if key_id == GEO_KEY_PROJECTED_CRS && value > 0 {
1230            return Ok(Some(i32::from(value)));
1231        }
1232        if key_id == GEO_KEY_GEOGRAPHIC_TYPE && value > 0 {
1233            return Ok(Some(i32::from(value)));
1234        }
1235    }
1236
1237    Ok(None)
1238}
1239
1240fn read_gdal_stats(
1241    tags: &HashMap<u16, IfdEntry>,
1242    reader: &Arc<dyn RangeReader>,
1243    _ifd_offset: u64,
1244    _little_endian: bool,
1245) -> AnyResult<(Option<f32>, Option<f32>)> {
1246    let Some(entry) = tags.get(&TAG_GDAL_METADATA) else {
1247        return Ok((None, None));
1248    };
1249
1250    // GDAL metadata is ASCII/UTF-8 XML
1251    let total_bytes = entry.count as usize;
1252    let raw_bytes = if total_bytes <= 4 {
1253        entry.raw_bytes[..total_bytes].to_vec()
1254    } else {
1255        reader.read_range(u64::from(entry.value_offset), total_bytes)?
1256    };
1257
1258    let metadata_str = String::from_utf8_lossy(&raw_bytes);
1259
1260    // Parse STATISTICS_MINIMUM and STATISTICS_MAXIMUM from XML
1261    let min = extract_gdal_stat(&metadata_str, "STATISTICS_MINIMUM");
1262    let max = extract_gdal_stat(&metadata_str, "STATISTICS_MAXIMUM");
1263
1264    Ok((min, max))
1265}
1266
1267fn extract_gdal_stat(metadata: &str, key: &str) -> Option<f32> {
1268    let needle = format!("name=\"{key}\"");
1269    let pos = metadata.find(&needle)?;
1270    let rest = &metadata[pos..];
1271    let start = rest.find('>')? + 1;
1272    let rest = &rest[start..];
1273    let end = rest.find('<')?;
1274    rest[..end].trim().parse().ok()
1275}
1276
1277fn read_gdal_nodata(
1278    tags: &HashMap<u16, IfdEntry>,
1279    reader: &Arc<dyn RangeReader>,
1280    _ifd_offset: u64,
1281    _little_endian: bool,
1282) -> AnyResult<Option<f64>> {
1283    let entry = match tags.get(&TAG_GDAL_NODATA) {
1284        Some(e) => e,
1285        None => return Ok(None),
1286    };
1287
1288    let total_bytes = entry.count as usize;
1289    let raw_bytes = if total_bytes <= 4 {
1290        entry.raw_bytes[..total_bytes].to_vec()
1291    } else {
1292        reader.read_range(u64::from(entry.value_offset), total_bytes)?
1293    };
1294
1295    let nodata_str = String::from_utf8_lossy(&raw_bytes);
1296    let nodata_str = nodata_str.trim_end_matches('\0').trim();
1297
1298    Ok(nodata_str.parse().ok())
1299}
1300
1301// ============================================================================
1302// Decompression and data conversion
1303// ============================================================================
1304
1305fn decompress_tile(
1306    compressed: &[u8],
1307    compression: Compression,
1308    tile_width: usize,
1309    tile_height: usize,
1310    bands: usize,
1311    bytes_per_sample: usize,
1312) -> AnyResult<Vec<u8>> {
1313    let expected_size = tile_width * tile_height * bands * bytes_per_sample;
1314
1315    match compression {
1316        Compression::None => {
1317            if compressed.len() >= expected_size {
1318                Ok(compressed[..expected_size].to_vec())
1319            } else {
1320                // Pad with zeros
1321                let mut result = compressed.to_vec();
1322                result.resize(expected_size, 0);
1323                Ok(result)
1324            }
1325        }
1326        Compression::Deflate => {
1327            use std::io::Read;
1328            let mut decoder = flate2::read::ZlibDecoder::new(compressed);
1329            let mut decompressed = Vec::with_capacity(expected_size);
1330            decoder.read_to_end(&mut decompressed)?;
1331            Ok(decompressed)
1332        }
1333        Compression::Lzw => {
1334            // Use weezl for LZW decompression
1335            let mut decoder = weezl::decode::Decoder::with_tiff_size_switch(weezl::BitOrder::Msb, 8);
1336            let decompressed = decoder.decode(compressed)?;
1337            Ok(decompressed)
1338        }
1339        Compression::Zstd => {
1340            let decompressed = zstd::stream::decode_all(compressed)?;
1341            Ok(decompressed)
1342        }
1343    }
1344}
1345
1346fn apply_predictor(
1347    data: &[u8],
1348    predictor: u16,
1349    tile_width: usize,
1350    bands: usize,
1351    bytes_per_sample: usize,
1352) -> AnyResult<Vec<u8>> {
1353    match predictor {
1354        1 => Ok(data.to_vec()), // No predictor
1355        2 => {
1356            // Horizontal differencing
1357            let mut result = data.to_vec();
1358            let row_bytes = tile_width * bands * bytes_per_sample;
1359
1360            for row in result.chunks_mut(row_bytes) {
1361                // Skip first pixel in each row
1362                for i in bytes_per_sample..row.len() {
1363                    row[i] = row[i].wrapping_add(row[i - bytes_per_sample]);
1364                }
1365            }
1366
1367            Ok(result)
1368        }
1369        3 => {
1370            // Floating point predictor - operates on bytes, not values
1371            // Each byte position is differenced independently
1372            let mut result = data.to_vec();
1373            let row_bytes = tile_width * bands * bytes_per_sample;
1374
1375            for row in result.chunks_mut(row_bytes) {
1376                for byte_pos in 0..bytes_per_sample {
1377                    for i in 1..(row.len() / bytes_per_sample) {
1378                        let idx = i * bytes_per_sample + byte_pos;
1379                        let prev_idx = (i - 1) * bytes_per_sample + byte_pos;
1380                        row[idx] = row[idx].wrapping_add(row[prev_idx]);
1381                    }
1382                }
1383            }
1384
1385            Ok(result)
1386        }
1387        _ => Err(format!("Unsupported predictor: {predictor}").into()),
1388    }
1389}
1390
1391fn convert_to_f32(data: &[u8], data_type: CogDataType, little_endian: bool) -> AnyResult<Vec<f32>> {
1392    let bytes_per_sample = data_type.bytes_per_sample();
1393    let sample_count = data.len() / bytes_per_sample;
1394    let mut result = Vec::with_capacity(sample_count);
1395
1396    for i in 0..sample_count {
1397        let offset = i * bytes_per_sample;
1398        let bytes = &data[offset..offset + bytes_per_sample];
1399
1400        let value = match data_type {
1401            CogDataType::UInt8 => f32::from(bytes[0]),
1402            CogDataType::Int8 => f32::from(bytes[0] as i8),
1403            CogDataType::UInt16 => {
1404                if little_endian {
1405                    f32::from(u16::from_le_bytes([bytes[0], bytes[1]]))
1406                } else {
1407                    f32::from(u16::from_be_bytes([bytes[0], bytes[1]]))
1408                }
1409            }
1410            CogDataType::Int16 => {
1411                if little_endian {
1412                    f32::from(i16::from_le_bytes([bytes[0], bytes[1]]))
1413                } else {
1414                    f32::from(i16::from_be_bytes([bytes[0], bytes[1]]))
1415                }
1416            }
1417            CogDataType::UInt32 => {
1418                if little_endian {
1419                    u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as f32
1420                } else {
1421                    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as f32
1422                }
1423            }
1424            CogDataType::Int32 => {
1425                if little_endian {
1426                    i32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as f32
1427                } else {
1428                    i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as f32
1429                }
1430            }
1431            CogDataType::Float32 => {
1432                if little_endian {
1433                    f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
1434                } else {
1435                    f32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
1436                }
1437            }
1438            CogDataType::UInt64 => {
1439                if little_endian {
1440                    u64::from_le_bytes([
1441                        bytes[0], bytes[1], bytes[2], bytes[3],
1442                        bytes[4], bytes[5], bytes[6], bytes[7],
1443                    ]) as f32
1444                } else {
1445                    u64::from_be_bytes([
1446                        bytes[0], bytes[1], bytes[2], bytes[3],
1447                        bytes[4], bytes[5], bytes[6], bytes[7],
1448                    ]) as f32
1449                }
1450            }
1451            CogDataType::Int64 => {
1452                if little_endian {
1453                    i64::from_le_bytes([
1454                        bytes[0], bytes[1], bytes[2], bytes[3],
1455                        bytes[4], bytes[5], bytes[6], bytes[7],
1456                    ]) as f32
1457                } else {
1458                    i64::from_be_bytes([
1459                        bytes[0], bytes[1], bytes[2], bytes[3],
1460                        bytes[4], bytes[5], bytes[6], bytes[7],
1461                    ]) as f32
1462                }
1463            }
1464            CogDataType::Float64 => {
1465                if little_endian {
1466                    f64::from_le_bytes([
1467                        bytes[0], bytes[1], bytes[2], bytes[3],
1468                        bytes[4], bytes[5], bytes[6], bytes[7],
1469                    ]) as f32
1470                } else {
1471                    f64::from_be_bytes([
1472                        bytes[0], bytes[1], bytes[2], bytes[3],
1473                        bytes[4], bytes[5], bytes[6], bytes[7],
1474                    ]) as f32
1475                }
1476            }
1477        };
1478
1479        result.push(value);
1480    }
1481
1482    Ok(result)
1483}
1484
1485#[cfg(test)]
1486mod tests {
1487    use super::*;
1488
1489    #[test]
1490    fn test_data_type_detection() {
1491        assert_eq!(CogDataType::from_tags(8, 1), Some(CogDataType::UInt8));
1492        assert_eq!(CogDataType::from_tags(16, 1), Some(CogDataType::UInt16));
1493        assert_eq!(CogDataType::from_tags(32, 3), Some(CogDataType::Float32));
1494        assert_eq!(CogDataType::from_tags(64, 3), Some(CogDataType::Float64));
1495    }
1496
1497    #[test]
1498    fn test_compression_detection() {
1499        assert_eq!(Compression::from_tag(1), Some(Compression::None));
1500        assert_eq!(Compression::from_tag(5), Some(Compression::Lzw));
1501        assert_eq!(Compression::from_tag(8), Some(Compression::Deflate));
1502    }
1503
1504    #[test]
1505    fn test_geo_transform() {
1506        let transform = GeoTransform {
1507            pixel_scale: Some([10.0, 10.0, 0.0]),
1508            tiepoint: Some([0.0, 0.0, 0.0, 100.0, 200.0, 0.0]),
1509        };
1510
1511        // Pixel (0,0) should map to (100, 200)
1512        let (wx, wy) = transform.pixel_to_world(0.0, 0.0).unwrap();
1513        assert!((wx - 100.0).abs() < 0.001);
1514        assert!((wy - 200.0).abs() < 0.001);
1515
1516        // Pixel (10, 5) should map to (200, 150)
1517        let (wx, wy) = transform.pixel_to_world(10.0, 5.0).unwrap();
1518        assert!((wx - 200.0).abs() < 0.001);
1519        assert!((wy - 150.0).abs() < 0.001);
1520    }
1521
1522    #[test]
1523    fn test_real_cog_file() {
1524        // Test with a real COG file if it exists
1525        let path = "data/viridis/output_cog.tif";
1526        if !std::path::Path::new(path).exists() {
1527            println!("Skipping test - file not found: {}", path);
1528            return;
1529        }
1530
1531        let reader = CogReader::open(path).expect("Failed to open COG");
1532        let m = &reader.metadata;
1533
1534        println!("Testing real COG: {}", path);
1535        println!("  Width: {}, Height: {}", m.width, m.height);
1536        println!("  Tile size: {}x{}", m.tile_width, m.tile_height);
1537        println!("  CRS code: {:?}", m.crs_code);
1538        println!("  Bands: {}", m.bands);
1539        println!("  Compression: {:?}", m.compression);
1540        println!("  Extent: {:?}", m.geo_transform.get_extent(m.width, m.height));
1541        println!("  Pixel scale: {:?}", m.geo_transform.pixel_scale);
1542        println!("  Tiepoint: {:?}", m.geo_transform.tiepoint);
1543
1544        // Verify basic metadata
1545        assert!(m.width > 0, "Width should be positive");
1546        assert!(m.height > 0, "Height should be positive");
1547        assert!(m.is_tiled(), "Should be a tiled TIFF");
1548
1549        // Test world_to_pixel for known coordinates
1550        // Center of the image should be at pixel (width/2, height/2)
1551        if let Some((px, py)) = m.geo_transform.world_to_pixel(0.0, 0.0) {
1552            println!("  Lon 0, Lat 0 -> pixel ({}, {})", px, py);
1553            // For a global dataset, (0,0) should be near center
1554            assert!(px > 0.0 && px < m.width as f64, "X pixel should be in range");
1555            assert!(py > 0.0 && py < m.height as f64, "Y pixel should be in range");
1556        }
1557
1558        // Try to read tile 0
1559        let tile_data = reader.read_tile(0).expect("Failed to read tile 0");
1560        assert!(!tile_data.is_empty(), "Tile data should not be empty");
1561
1562        let non_nan = tile_data.iter().filter(|v| !v.is_nan()).count();
1563        println!("  Tile 0: {} values, {} non-NaN", tile_data.len(), non_nan);
1564        assert!(non_nan > 0, "Tile should have some valid pixels");
1565
1566        // Test min/max estimation
1567        let (min, max) = reader.estimate_min_max().expect("Failed to estimate min/max");
1568        println!("  Estimated min: {}, max: {}", min, max);
1569        // For an RGB image, values should be 0-255
1570        assert!(min >= 0.0, "Min should be >= 0");
1571        assert!(max <= 255.0, "Max should be <= 255 for 8-bit data");
1572    }
1573}
1574
1575// ============================================================
1576// COMPREHENSIVE TESTS FOR COG READER
1577// These tests verify:
1578// 1. Scale calculation uses FLOOR division (matching GDAL)
1579// 2. CRS detection works correctly
1580// 3. Pixel values match GDAL output
1581// ============================================================
1582
1583#[test]
1584fn test_gray_3857_crs_detection() {
1585    let path = "data/grayscale/gray_3857-cog.tif";
1586    if !std::path::Path::new(path).exists() {
1587        println!("Skipping - file not found: {}", path);
1588        return;
1589    }
1590
1591    let reader = CogReader::open(path).expect("Failed to open COG");
1592
1593    // EPSG:3857 should be detected
1594    assert_eq!(reader.metadata.crs_code, Some(3857), "CRS should be detected as 3857");
1595}
1596
1597/// TEST: Overview scale calculation uses FLOOR division
1598///
1599/// This test catches the bug where we used ceiling division instead of floor.
1600/// For gray_3857-cog.tif (20966x20966), overview 3 (1310x1310):
1601/// - WRONG (ceiling): (20966 + 1310 - 1) / 1310 = 17
1602/// - CORRECT (floor): 20966 / 1310 = 16
1603///
1604/// The scale affects coordinate calculations, causing ~6% pixel position errors.
1605#[test]
1606fn test_overview_scale_uses_floor_division() {
1607    let path = "data/grayscale/gray_3857-cog.tif";
1608    if !std::path::Path::new(path).exists() {
1609        println!("Skipping - file not found: {}", path);
1610        return;
1611    }
1612
1613    let reader = CogReader::open(path).expect("Failed to open COG");
1614
1615    // Verify we have overviews
1616    assert!(!reader.overviews.is_empty(), "Should have overviews");
1617
1618    // Check that scale calculation matches GDAL behavior (floor division)
1619    let full_width = reader.metadata.width;
1620
1621    for (i, ovr) in reader.overviews.iter().enumerate() {
1622        // Calculate expected scale using floor division (GDAL's method)
1623        let expected_scale = full_width / ovr.width;
1624
1625        // Verify our scale matches
1626        assert_eq!(
1627            ovr.scale, expected_scale,
1628            "Overview {} scale mismatch: got {}, expected {} (floor of {}/{})",
1629            i, ovr.scale, expected_scale, full_width, ovr.width
1630        );
1631
1632        // Also verify it's NOT using ceiling division
1633        let ceiling_scale = full_width.div_ceil(ovr.width);
1634        if ceiling_scale != expected_scale {
1635            // If ceiling would give different result, make sure we're using floor
1636            assert_ne!(
1637                ovr.scale, ceiling_scale,
1638                "Overview {} appears to use ceiling division (got {}), should use floor ({})",
1639                i, ceiling_scale, expected_scale
1640            );
1641        }
1642    }
1643
1644    // Specific check for overview 3 which caused the original bug
1645    if reader.overviews.len() > 3 {
1646        let ovr3 = &reader.overviews[3];
1647        assert_eq!(
1648            ovr3.scale, 16,
1649            "Overview 3 (1310x1310) scale should be 16 (floor), not 17 (ceiling)"
1650        );
1651    }
1652}
1653
1654/// TEST: Pixel values at known positions match GDAL output
1655///
1656/// This test verifies that the tile extraction produces correct pixel values.
1657/// Values were obtained from GDAL: gdal_translate with -projwin
1658#[test]
1659fn test_overview_pixel_values_match_gdal() {
1660    let path = "data/grayscale/gray_3857-cog.tif";
1661    if !std::path::Path::new(path).exists() {
1662        println!("Skipping - file not found: {}", path);
1663        return;
1664    }
1665
1666    let reader = CogReader::open(path).expect("Failed to open COG");
1667
1668    // Test reading from overview 3 (1310x1310, scale 16)
1669    if reader.overviews.len() > 3 {
1670        let ovr_idx = 3;
1671        let ovr = &reader.overviews[ovr_idx];
1672
1673        // Verify overview properties
1674        assert_eq!(ovr.width, 1310, "Overview 3 should be 1310 wide");
1675        assert_eq!(ovr.height, 1310, "Overview 3 should be 1310 tall");
1676        assert_eq!(ovr.scale, 16, "Overview 3 scale should be 16");
1677
1678        // Read tile 0 from overview
1679        let tile_data = reader.read_overview_tile(ovr_idx, 0).expect("Failed to read overview tile 0");
1680
1681        // Verify tile data size
1682        let expected_size = ovr.tile_width * ovr.tile_height;
1683        assert_eq!(tile_data.len(), expected_size, "Tile data should be {}x{} pixels", ovr.tile_width, ovr.tile_height);
1684
1685        // Check that we have valid (non-NaN) data
1686        let valid_count = tile_data.iter().filter(|v| !v.is_nan()).count();
1687        assert!(valid_count > 0, "Tile should have valid (non-NaN) pixels");
1688
1689        // Verify pixel values are in expected range for grayscale (0-255)
1690        let min_val = tile_data.iter().filter(|v| !v.is_nan()).copied().fold(f32::INFINITY, f32::min);
1691        let max_val = tile_data.iter().filter(|v| !v.is_nan()).copied().fold(f32::NEG_INFINITY, f32::max);
1692
1693        assert!(min_val >= 0.0, "Min value should be >= 0, got {}", min_val);
1694        assert!(max_val <= 255.0, "Max value should be <= 255, got {}", max_val);
1695
1696        // Check specific pixel value that GDAL reports
1697        // At tile position (0, 0) in overview 3, GDAL shows value ~176
1698        let corner_value = tile_data[0];
1699        assert!(
1700            !corner_value.is_nan() && (100.0..=255.0).contains(&corner_value),
1701            "Corner value should be valid grayscale, got {}",
1702            corner_value
1703        );
1704    }
1705}
1706
1707/// TEST: Scale factor correctly affects coordinate mapping
1708///
1709/// This test verifies that the scale factor properly adjusts the pixel_scale
1710/// when using overviews, which is critical for correct tile generation.
1711#[test]
1712fn test_scale_factor_coordinate_mapping() {
1713    let path = "data/grayscale/gray_3857-cog.tif";
1714    if !std::path::Path::new(path).exists() {
1715        println!("Skipping - file not found: {}", path);
1716        return;
1717    }
1718
1719    let reader = CogReader::open(path).expect("Failed to open COG");
1720
1721    if let (Some(pixel_scale), Some(_tiepoint)) = (
1722        reader.metadata.geo_transform.pixel_scale,
1723        reader.metadata.geo_transform.tiepoint,
1724    ) {
1725        let base_scale_x = pixel_scale[0];
1726
1727        for (i, ovr) in reader.overviews.iter().enumerate() {
1728            // Calculate effective scale for this overview
1729            let effective_scale_x = base_scale_x * (ovr.scale as f64);
1730
1731            // The effective scale should roughly equal full_extent / overview_width
1732            // For a COG covering ~20 million meters in 1310 pixels at overview 3:
1733            // effective_scale ≈ 20e6 / 1310 ≈ 15267 meters/pixel
1734            let full_extent_x = base_scale_x * (reader.metadata.width as f64);
1735            let expected_effective_scale = full_extent_x / (ovr.width as f64);
1736
1737            // Allow 1% tolerance for rounding
1738            let tolerance = expected_effective_scale * 0.01;
1739            assert!(
1740                (effective_scale_x - expected_effective_scale).abs() < tolerance,
1741                "Overview {} effective scale mismatch: got {}, expected {} (within {})",
1742                i, effective_scale_x, expected_effective_scale, tolerance
1743            );
1744        }
1745    }
1746}
1747
1748/// TEST: Best overview selection returns appropriate level
1749#[test]
1750fn test_best_overview_selection() {
1751    let path = "data/grayscale/gray_3857-cog.tif";
1752    if !std::path::Path::new(path).exists() {
1753        println!("Skipping - file not found: {}", path);
1754        return;
1755    }
1756
1757    let reader = CogReader::open(path).expect("Failed to open COG");
1758
1759    // For a small extent (256 pixels worth), should return None (use full res)
1760    let _full_res = reader.best_overview_for_resolution(256, 256);
1761    // This might return None or a small overview index depending on the image
1762
1763    // For a large extent (whole image), should return highest overview
1764    let large_extent = reader.best_overview_for_resolution(20000, 20000);
1765    assert!(
1766        large_extent.is_some() || reader.overviews.is_empty(),
1767        "Large extent should use an overview"
1768    );
1769
1770    // For medium extent, should return appropriate overview
1771    let medium_extent = reader.best_overview_for_resolution(5000, 5000);
1772    // Just verify it doesn't panic and returns a valid index
1773    if let Some(idx) = medium_extent {
1774        assert!(
1775            idx < reader.overviews.len(),
1776            "Overview index {} should be valid",
1777            idx
1778        );
1779    }
1780}