wsi_streamer/format/tiff/
values.rs

1//! TIFF tag value reading.
2//!
3//! This module provides functionality to read tag values from TIFF files.
4//! Values can be stored either inline in the IFD entry (for small values)
5//! or at an offset in the file (for larger values like arrays).
6//!
7//! # Performance Considerations
8//!
9//! For array values (like TileOffsets and TileByteCounts), this module
10//! fetches the entire array in a single range request. This is critical
11//! for performance when working with remote storage.
12
13use bytes::Bytes;
14
15use crate::error::TiffError;
16use crate::io::RangeReader;
17
18use super::parser::{ByteOrder, IfdEntry, TiffHeader};
19use super::tags::FieldType;
20
21// =============================================================================
22// ValueReader
23// =============================================================================
24
25/// Reads tag values from a TIFF file.
26///
27/// This struct combines a RangeReader with TIFF header information to
28/// read values respecting the file's byte order and format.
29pub struct ValueReader<'a, R: RangeReader> {
30    reader: &'a R,
31    header: &'a TiffHeader,
32}
33
34impl<'a, R: RangeReader> ValueReader<'a, R> {
35    /// Create a new ValueReader.
36    pub fn new(reader: &'a R, header: &'a TiffHeader) -> Self {
37        Self { reader, header }
38    }
39
40    /// Get the byte order from the header.
41    #[inline]
42    pub fn byte_order(&self) -> ByteOrder {
43        self.header.byte_order
44    }
45
46    /// Read raw bytes for an IFD entry's value.
47    ///
48    /// For inline values, returns the bytes from the entry.
49    /// For offset values, fetches the bytes from the file.
50    pub async fn read_bytes(&self, entry: &IfdEntry) -> Result<Bytes, TiffError> {
51        let size = entry
52            .value_byte_size()
53            .ok_or(TiffError::UnknownFieldType(entry.field_type_raw))?;
54
55        if entry.is_inline {
56            // Value is stored inline - extract from entry bytes
57            Ok(Bytes::copy_from_slice(
58                &entry.value_offset_bytes[..size as usize],
59            ))
60        } else {
61            // Value is at an offset - fetch from file
62            let offset = entry.value_offset(self.header.byte_order);
63            let bytes = self.reader.read_exact_at(offset, size as usize).await?;
64            Ok(bytes)
65        }
66    }
67
68    /// Read a single u32 value from an entry.
69    ///
70    /// Handles both Short and Long field types, converting as needed.
71    pub async fn read_u32(&self, entry: &IfdEntry) -> Result<u32, TiffError> {
72        // Try inline first
73        if let Some(value) = entry.inline_u32(self.header.byte_order) {
74            return Ok(value);
75        }
76
77        // Must fetch from offset
78        let field_type = entry
79            .field_type
80            .ok_or(TiffError::UnknownFieldType(entry.field_type_raw))?;
81
82        if entry.count != 1 {
83            return Err(TiffError::InvalidTagValue {
84                tag: "unknown",
85                message: format!("expected count 1, got {}", entry.count),
86            });
87        }
88
89        let bytes = self.read_bytes(entry).await?;
90        let byte_order = self.header.byte_order;
91
92        match field_type {
93            FieldType::Short => Ok(byte_order.read_u16(&bytes) as u32),
94            FieldType::Long => Ok(byte_order.read_u32(&bytes)),
95            _ => Err(TiffError::InvalidTagValue {
96                tag: "unknown",
97                message: format!("expected Short or Long, got {:?}", field_type),
98            }),
99        }
100    }
101
102    /// Read a single u64 value from an entry.
103    ///
104    /// Handles Short, Long, and Long8 field types, converting as needed.
105    pub async fn read_u64(&self, entry: &IfdEntry) -> Result<u64, TiffError> {
106        // Try inline first
107        if let Some(value) = entry.inline_u64(self.header.byte_order) {
108            return Ok(value);
109        }
110
111        // Must fetch from offset
112        let field_type = entry
113            .field_type
114            .ok_or(TiffError::UnknownFieldType(entry.field_type_raw))?;
115
116        if entry.count != 1 {
117            return Err(TiffError::InvalidTagValue {
118                tag: "unknown",
119                message: format!("expected count 1, got {}", entry.count),
120            });
121        }
122
123        let bytes = self.read_bytes(entry).await?;
124        let byte_order = self.header.byte_order;
125
126        match field_type {
127            FieldType::Short => Ok(byte_order.read_u16(&bytes) as u64),
128            FieldType::Long => Ok(byte_order.read_u32(&bytes) as u64),
129            FieldType::Long8 => Ok(byte_order.read_u64(&bytes)),
130            _ => Err(TiffError::InvalidTagValue {
131                tag: "unknown",
132                message: format!("expected Short, Long, or Long8, got {:?}", field_type),
133            }),
134        }
135    }
136
137    /// Read an array of u64 values from an entry.
138    ///
139    /// This is the primary method for reading TileOffsets and TileByteCounts.
140    /// The entire array is fetched in a single range request for efficiency.
141    ///
142    /// Handles Short, Long, and Long8 field types, converting all to u64.
143    pub async fn read_u64_array(&self, entry: &IfdEntry) -> Result<Vec<u64>, TiffError> {
144        let field_type = entry
145            .field_type
146            .ok_or(TiffError::UnknownFieldType(entry.field_type_raw))?;
147
148        let count = entry.count as usize;
149        if count == 0 {
150            return Ok(Vec::new());
151        }
152
153        let bytes = self.read_bytes(entry).await?;
154        let byte_order = self.header.byte_order;
155
156        let mut values = Vec::with_capacity(count);
157
158        match field_type {
159            FieldType::Short => {
160                for i in 0..count {
161                    let offset = i * 2;
162                    values.push(byte_order.read_u16(&bytes[offset..]) as u64);
163                }
164            }
165            FieldType::Long => {
166                for i in 0..count {
167                    let offset = i * 4;
168                    values.push(byte_order.read_u32(&bytes[offset..]) as u64);
169                }
170            }
171            FieldType::Long8 => {
172                for i in 0..count {
173                    let offset = i * 8;
174                    values.push(byte_order.read_u64(&bytes[offset..]));
175                }
176            }
177            _ => {
178                return Err(TiffError::InvalidTagValue {
179                    tag: "unknown",
180                    message: format!(
181                        "expected Short, Long, or Long8 for array, got {:?}",
182                        field_type
183                    ),
184                });
185            }
186        }
187
188        Ok(values)
189    }
190
191    /// Read an array of u32 values from an entry.
192    ///
193    /// Similar to read_u64_array but returns u32 values.
194    /// Useful for tile dimensions and other 32-bit array values.
195    pub async fn read_u32_array(&self, entry: &IfdEntry) -> Result<Vec<u32>, TiffError> {
196        let field_type = entry
197            .field_type
198            .ok_or(TiffError::UnknownFieldType(entry.field_type_raw))?;
199
200        let count = entry.count as usize;
201        if count == 0 {
202            return Ok(Vec::new());
203        }
204
205        let bytes = self.read_bytes(entry).await?;
206        let byte_order = self.header.byte_order;
207
208        let mut values = Vec::with_capacity(count);
209
210        match field_type {
211            FieldType::Short => {
212                for i in 0..count {
213                    let offset = i * 2;
214                    values.push(byte_order.read_u16(&bytes[offset..]) as u32);
215                }
216            }
217            FieldType::Long => {
218                for i in 0..count {
219                    let offset = i * 4;
220                    values.push(byte_order.read_u32(&bytes[offset..]));
221                }
222            }
223            _ => {
224                return Err(TiffError::InvalidTagValue {
225                    tag: "unknown",
226                    message: format!("expected Short or Long for u32 array, got {:?}", field_type),
227                });
228            }
229        }
230
231        Ok(values)
232    }
233
234    /// Read a string value from an entry (ASCII type).
235    ///
236    /// The string is expected to be null-terminated. The null terminator
237    /// is stripped from the result.
238    pub async fn read_string(&self, entry: &IfdEntry) -> Result<String, TiffError> {
239        let field_type = entry
240            .field_type
241            .ok_or(TiffError::UnknownFieldType(entry.field_type_raw))?;
242
243        if field_type != FieldType::Ascii {
244            return Err(TiffError::InvalidTagValue {
245                tag: "unknown",
246                message: format!("expected Ascii type for string, got {:?}", field_type),
247            });
248        }
249
250        let bytes = self.read_bytes(entry).await?;
251
252        // Find null terminator and convert to string
253        let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
254        let s = String::from_utf8_lossy(&bytes[..end]).into_owned();
255
256        Ok(s)
257    }
258
259    /// Read raw bytes from an entry (for UNDEFINED or opaque data).
260    ///
261    /// This is used for JPEGTables and other binary data.
262    pub async fn read_raw_bytes(&self, entry: &IfdEntry) -> Result<Bytes, TiffError> {
263        self.read_bytes(entry).await
264    }
265}
266
267// =============================================================================
268// Convenience functions for reading from bytes directly
269// =============================================================================
270
271/// Parse an array of u64 values from raw bytes.
272///
273/// This is useful when you already have the bytes and just need to parse them.
274pub fn parse_u64_array(
275    bytes: &[u8],
276    count: usize,
277    field_type: FieldType,
278    byte_order: ByteOrder,
279) -> Vec<u64> {
280    let mut values = Vec::with_capacity(count);
281
282    match field_type {
283        FieldType::Short => {
284            for i in 0..count {
285                let offset = i * 2;
286                if offset + 2 <= bytes.len() {
287                    values.push(byte_order.read_u16(&bytes[offset..]) as u64);
288                }
289            }
290        }
291        FieldType::Long => {
292            for i in 0..count {
293                let offset = i * 4;
294                if offset + 4 <= bytes.len() {
295                    values.push(byte_order.read_u32(&bytes[offset..]) as u64);
296                }
297            }
298        }
299        FieldType::Long8 => {
300            for i in 0..count {
301                let offset = i * 8;
302                if offset + 8 <= bytes.len() {
303                    values.push(byte_order.read_u64(&bytes[offset..]));
304                }
305            }
306        }
307        _ => {}
308    }
309
310    values
311}
312
313/// Parse an array of u32 values from raw bytes.
314pub fn parse_u32_array(
315    bytes: &[u8],
316    count: usize,
317    field_type: FieldType,
318    byte_order: ByteOrder,
319) -> Vec<u32> {
320    let mut values = Vec::with_capacity(count);
321
322    match field_type {
323        FieldType::Short => {
324            for i in 0..count {
325                let offset = i * 2;
326                if offset + 2 <= bytes.len() {
327                    values.push(byte_order.read_u16(&bytes[offset..]) as u32);
328                }
329            }
330        }
331        FieldType::Long => {
332            for i in 0..count {
333                let offset = i * 4;
334                if offset + 4 <= bytes.len() {
335                    values.push(byte_order.read_u32(&bytes[offset..]));
336                }
337            }
338        }
339        _ => {}
340    }
341
342    values
343}
344
345// =============================================================================
346// Tests
347// =============================================================================
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use crate::error::IoError;
353    use async_trait::async_trait;
354
355    /// Mock reader for testing
356    struct MockReader {
357        data: Vec<u8>,
358    }
359
360    impl MockReader {
361        fn new(data: Vec<u8>) -> Self {
362            Self { data }
363        }
364    }
365
366    #[async_trait]
367    impl RangeReader for MockReader {
368        async fn read_exact_at(&self, offset: u64, len: usize) -> Result<Bytes, IoError> {
369            let start = offset as usize;
370            let end = start + len;
371            if end > self.data.len() {
372                return Err(IoError::RangeOutOfBounds {
373                    offset,
374                    requested: len as u64,
375                    size: self.data.len() as u64,
376                });
377            }
378            Ok(Bytes::copy_from_slice(&self.data[start..end]))
379        }
380
381        fn size(&self) -> u64 {
382            self.data.len() as u64
383        }
384
385        fn identifier(&self) -> &str {
386            "mock://test"
387        }
388    }
389
390    fn make_tiff_header() -> TiffHeader {
391        TiffHeader {
392            byte_order: ByteOrder::LittleEndian,
393            is_bigtiff: false,
394            first_ifd_offset: 8,
395        }
396    }
397
398    // -------------------------------------------------------------------------
399    // parse_u64_array tests
400    // -------------------------------------------------------------------------
401
402    #[test]
403    fn test_parse_u64_array_short() {
404        // Array of 4 SHORT values: 100, 200, 300, 400
405        let bytes = [
406            0x64, 0x00, // 100
407            0xC8, 0x00, // 200
408            0x2C, 0x01, // 300
409            0x90, 0x01, // 400
410        ];
411
412        let result = parse_u64_array(&bytes, 4, FieldType::Short, ByteOrder::LittleEndian);
413        assert_eq!(result, vec![100, 200, 300, 400]);
414    }
415
416    #[test]
417    fn test_parse_u64_array_long() {
418        // Array of 3 LONG values: 1000, 2000, 3000
419        let bytes = [
420            0xE8, 0x03, 0x00, 0x00, // 1000
421            0xD0, 0x07, 0x00, 0x00, // 2000
422            0xB8, 0x0B, 0x00, 0x00, // 3000
423        ];
424
425        let result = parse_u64_array(&bytes, 3, FieldType::Long, ByteOrder::LittleEndian);
426        assert_eq!(result, vec![1000, 2000, 3000]);
427    }
428
429    #[test]
430    fn test_parse_u64_array_long8() {
431        // Array of 2 LONG8 values
432        let bytes = [
433            0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, // 4GB
434            0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, // 8GB
435        ];
436
437        let result = parse_u64_array(&bytes, 2, FieldType::Long8, ByteOrder::LittleEndian);
438        assert_eq!(result, vec![0x0000_0001_0000_0000, 0x0000_0002_0000_0000]);
439    }
440
441    #[test]
442    fn test_parse_u64_array_big_endian() {
443        // Big-endian LONG values
444        let bytes = [
445            0x00, 0x00, 0x03, 0xE8, // 1000
446            0x00, 0x00, 0x07, 0xD0, // 2000
447        ];
448
449        let result = parse_u64_array(&bytes, 2, FieldType::Long, ByteOrder::BigEndian);
450        assert_eq!(result, vec![1000, 2000]);
451    }
452
453    #[test]
454    fn test_parse_u32_array() {
455        let bytes = [
456            0x00, 0x01, // 256 (SHORT)
457            0x00, 0x02, // 512
458        ];
459
460        let result = parse_u32_array(&bytes, 2, FieldType::Short, ByteOrder::LittleEndian);
461        assert_eq!(result, vec![256, 512]);
462    }
463
464    // -------------------------------------------------------------------------
465    // ValueReader async tests
466    // -------------------------------------------------------------------------
467
468    #[tokio::test]
469    async fn test_value_reader_read_bytes_inline() {
470        let reader = MockReader::new(vec![0; 100]);
471        let header = make_tiff_header();
472        let value_reader = ValueReader::new(&reader, &header);
473
474        // Create an inline entry (count=1, SHORT type = 2 bytes, fits in 4-byte field)
475        let entry = IfdEntry {
476            tag_id: 256,
477            field_type: Some(FieldType::Short),
478            field_type_raw: 3,
479            count: 1,
480            value_offset_bytes: vec![0x00, 0x04, 0x00, 0x00], // 1024
481            is_inline: true,
482        };
483
484        let bytes = value_reader.read_bytes(&entry).await.unwrap();
485        assert_eq!(bytes.len(), 2);
486        assert_eq!(&bytes[..], &[0x00, 0x04]);
487    }
488
489    #[tokio::test]
490    async fn test_value_reader_read_bytes_offset() {
491        // File with data at offset 50
492        let mut data = vec![0u8; 100];
493        data[50] = 0xAB;
494        data[51] = 0xCD;
495        data[52] = 0xEF;
496        data[53] = 0x12;
497
498        let reader = MockReader::new(data);
499        let header = make_tiff_header();
500        let value_reader = ValueReader::new(&reader, &header);
501
502        // Entry pointing to offset 50, LONG type, count 1
503        let entry = IfdEntry {
504            tag_id: 256,
505            field_type: Some(FieldType::Long),
506            field_type_raw: 4,
507            count: 1,
508            value_offset_bytes: vec![0x32, 0x00, 0x00, 0x00], // offset 50
509            is_inline: false,
510        };
511
512        let bytes = value_reader.read_bytes(&entry).await.unwrap();
513        assert_eq!(bytes.len(), 4);
514        assert_eq!(&bytes[..], &[0xAB, 0xCD, 0xEF, 0x12]);
515    }
516
517    #[tokio::test]
518    async fn test_value_reader_read_u64_array() {
519        // File with tile offsets at offset 100
520        let mut data = vec![0u8; 200];
521        // Write 5 LONG values at offset 100
522        let offsets: [u32; 5] = [1000, 2000, 3000, 4000, 5000];
523        for (i, &val) in offsets.iter().enumerate() {
524            let bytes = val.to_le_bytes();
525            let pos = 100 + i * 4;
526            data[pos..pos + 4].copy_from_slice(&bytes);
527        }
528
529        let reader = MockReader::new(data);
530        let header = make_tiff_header();
531        let value_reader = ValueReader::new(&reader, &header);
532
533        // TileOffsets entry: 5 LONG values at offset 100
534        let entry = IfdEntry {
535            tag_id: 324, // TileOffsets
536            field_type: Some(FieldType::Long),
537            field_type_raw: 4,
538            count: 5,
539            value_offset_bytes: vec![0x64, 0x00, 0x00, 0x00], // offset 100
540            is_inline: false,
541        };
542
543        let result = value_reader.read_u64_array(&entry).await.unwrap();
544        assert_eq!(result, vec![1000, 2000, 3000, 4000, 5000]);
545    }
546
547    #[tokio::test]
548    async fn test_value_reader_read_string() {
549        // File with ImageDescription at offset 20
550        let mut data = vec![0u8; 100];
551        let desc = b"Aperio Image\0";
552        data[20..20 + desc.len()].copy_from_slice(desc);
553
554        let reader = MockReader::new(data);
555        let header = make_tiff_header();
556        let value_reader = ValueReader::new(&reader, &header);
557
558        // ImageDescription entry
559        let entry = IfdEntry {
560            tag_id: 270, // ImageDescription
561            field_type: Some(FieldType::Ascii),
562            field_type_raw: 2,
563            count: desc.len() as u64,
564            value_offset_bytes: vec![0x14, 0x00, 0x00, 0x00], // offset 20
565            is_inline: false,
566        };
567
568        let result = value_reader.read_string(&entry).await.unwrap();
569        assert_eq!(result, "Aperio Image");
570    }
571
572    #[tokio::test]
573    async fn test_value_reader_read_raw_bytes() {
574        // File with JPEGTables at offset 30
575        let mut data = vec![0u8; 100];
576        // JPEG tables typically start with FFD8 and end with FFD9
577        data[30] = 0xFF;
578        data[31] = 0xD8;
579        data[32] = 0xFF;
580        data[33] = 0xDB;
581        data[34] = 0xFF;
582        data[35] = 0xD9;
583
584        let reader = MockReader::new(data);
585        let header = make_tiff_header();
586        let value_reader = ValueReader::new(&reader, &header);
587
588        // JPEGTables entry (UNDEFINED type)
589        let entry = IfdEntry {
590            tag_id: 347, // JPEGTables
591            field_type: Some(FieldType::Undefined),
592            field_type_raw: 7,
593            count: 6,
594            value_offset_bytes: vec![0x1E, 0x00, 0x00, 0x00], // offset 30
595            is_inline: false,
596        };
597
598        let result = value_reader.read_raw_bytes(&entry).await.unwrap();
599        assert_eq!(result.len(), 6);
600        assert_eq!(&result[..], &[0xFF, 0xD8, 0xFF, 0xDB, 0xFF, 0xD9]);
601    }
602
603    #[tokio::test]
604    async fn test_value_reader_inline_u32() {
605        let reader = MockReader::new(vec![0; 100]);
606        let header = make_tiff_header();
607        let value_reader = ValueReader::new(&reader, &header);
608
609        // Inline LONG value
610        let entry = IfdEntry {
611            tag_id: 256,
612            field_type: Some(FieldType::Long),
613            field_type_raw: 4,
614            count: 1,
615            value_offset_bytes: vec![0x50, 0xC3, 0x00, 0x00], // 50000
616            is_inline: true,
617        };
618
619        let result = value_reader.read_u32(&entry).await.unwrap();
620        assert_eq!(result, 50000);
621    }
622
623    #[tokio::test]
624    async fn test_value_reader_error_unknown_type() {
625        let reader = MockReader::new(vec![0; 100]);
626        let header = make_tiff_header();
627        let value_reader = ValueReader::new(&reader, &header);
628
629        // Entry with unknown field type
630        let entry = IfdEntry {
631            tag_id: 256,
632            field_type: None,
633            field_type_raw: 99,
634            count: 1,
635            value_offset_bytes: vec![0x00, 0x00, 0x00, 0x00],
636            is_inline: false,
637        };
638
639        let result = value_reader.read_bytes(&entry).await;
640        assert!(matches!(result, Err(TiffError::UnknownFieldType(99))));
641    }
642}