oxidize_pdf/graphics/
image.rs

1//! Image support for PDF generation
2//!
3//! Currently supports:
4//! - JPEG images
5
6use crate::objects::{Dictionary, Object};
7use crate::{PdfError, Result};
8use std::fs::File;
9use std::io::Read;
10use std::path::Path;
11
12/// Represents an image that can be embedded in a PDF
13#[derive(Debug, Clone)]
14pub struct Image {
15    /// Image data
16    data: Vec<u8>,
17    /// Image format
18    format: ImageFormat,
19    /// Width in pixels
20    width: u32,
21    /// Height in pixels
22    height: u32,
23    /// Color space
24    color_space: ColorSpace,
25    /// Bits per component
26    bits_per_component: u8,
27}
28
29/// Supported image formats
30#[derive(Debug, Clone, Copy, PartialEq)]
31pub enum ImageFormat {
32    /// JPEG format
33    Jpeg,
34    /// PNG format
35    Png,
36    /// TIFF format
37    Tiff,
38}
39
40/// Color spaces for images
41#[derive(Debug, Clone, Copy, PartialEq)]
42pub enum ColorSpace {
43    /// Grayscale
44    DeviceGray,
45    /// RGB color
46    DeviceRGB,
47    /// CMYK color
48    DeviceCMYK,
49}
50
51impl Image {
52    /// Load a JPEG image from a file
53    pub fn from_jpeg_file<P: AsRef<Path>>(path: P) -> Result<Self> {
54        let mut file = File::open(path)?;
55        let mut data = Vec::new();
56        file.read_to_end(&mut data)?;
57        Self::from_jpeg_data(data)
58    }
59
60    /// Create an image from JPEG data
61    pub fn from_jpeg_data(data: Vec<u8>) -> Result<Self> {
62        // Parse JPEG header to get dimensions and color info
63        let (width, height, color_space, bits_per_component) = parse_jpeg_header(&data)?;
64
65        Ok(Image {
66            data,
67            format: ImageFormat::Jpeg,
68            width,
69            height,
70            color_space,
71            bits_per_component,
72        })
73    }
74
75    /// Load a PNG image from a file
76    pub fn from_png_file<P: AsRef<Path>>(path: P) -> Result<Self> {
77        let mut file = File::open(path)?;
78        let mut data = Vec::new();
79        file.read_to_end(&mut data)?;
80        Self::from_png_data(data)
81    }
82
83    /// Create an image from PNG data
84    pub fn from_png_data(data: Vec<u8>) -> Result<Self> {
85        // Parse PNG header to get dimensions and color info
86        let (width, height, color_space, bits_per_component) = parse_png_header(&data)?;
87
88        Ok(Image {
89            data,
90            format: ImageFormat::Png,
91            width,
92            height,
93            color_space,
94            bits_per_component,
95        })
96    }
97
98    /// Load a TIFF image from a file
99    pub fn from_tiff_file<P: AsRef<Path>>(path: P) -> Result<Self> {
100        let mut file = File::open(path)?;
101        let mut data = Vec::new();
102        file.read_to_end(&mut data)?;
103        Self::from_tiff_data(data)
104    }
105
106    /// Create an image from TIFF data
107    pub fn from_tiff_data(data: Vec<u8>) -> Result<Self> {
108        // Parse TIFF header to get dimensions and color info
109        let (width, height, color_space, bits_per_component) = parse_tiff_header(&data)?;
110
111        Ok(Image {
112            data,
113            format: ImageFormat::Tiff,
114            width,
115            height,
116            color_space,
117            bits_per_component,
118        })
119    }
120
121    /// Get image width in pixels
122    pub fn width(&self) -> u32 {
123        self.width
124    }
125
126    /// Get image height in pixels
127    pub fn height(&self) -> u32 {
128        self.height
129    }
130
131    /// Get image data
132    pub fn data(&self) -> &[u8] {
133        &self.data
134    }
135
136    /// Get image format
137    pub fn format(&self) -> ImageFormat {
138        self.format
139    }
140
141    /// Convert to PDF XObject
142    pub fn to_pdf_object(&self) -> Object {
143        let mut dict = Dictionary::new();
144
145        // Required entries for image XObject
146        dict.set("Type", Object::Name("XObject".to_string()));
147        dict.set("Subtype", Object::Name("Image".to_string()));
148        dict.set("Width", Object::Integer(self.width as i64));
149        dict.set("Height", Object::Integer(self.height as i64));
150
151        // Color space
152        let color_space_name = match self.color_space {
153            ColorSpace::DeviceGray => "DeviceGray",
154            ColorSpace::DeviceRGB => "DeviceRGB",
155            ColorSpace::DeviceCMYK => "DeviceCMYK",
156        };
157        dict.set("ColorSpace", Object::Name(color_space_name.to_string()));
158
159        // Bits per component
160        dict.set(
161            "BitsPerComponent",
162            Object::Integer(self.bits_per_component as i64),
163        );
164
165        // Filter based on image format
166        match self.format {
167            ImageFormat::Jpeg => {
168                dict.set("Filter", Object::Name("DCTDecode".to_string()));
169            }
170            ImageFormat::Png => {
171                dict.set("Filter", Object::Name("FlateDecode".to_string()));
172            }
173            ImageFormat::Tiff => {
174                // TIFF can use various filters, but commonly LZW or FlateDecode
175                dict.set("Filter", Object::Name("FlateDecode".to_string()));
176            }
177        }
178
179        // Create stream with image data
180        Object::Stream(dict, self.data.clone())
181    }
182}
183
184/// Parse JPEG header to extract image information
185fn parse_jpeg_header(data: &[u8]) -> Result<(u32, u32, ColorSpace, u8)> {
186    if data.len() < 2 || data[0] != 0xFF || data[1] != 0xD8 {
187        return Err(PdfError::InvalidImage("Not a valid JPEG file".to_string()));
188    }
189
190    let mut pos = 2;
191    let mut width = 0;
192    let mut height = 0;
193    let mut components = 0;
194
195    while pos < data.len() - 1 {
196        if data[pos] != 0xFF {
197            return Err(PdfError::InvalidImage("Invalid JPEG marker".to_string()));
198        }
199
200        let marker = data[pos + 1];
201        pos += 2;
202
203        // Skip padding bytes
204        if marker == 0xFF {
205            continue;
206        }
207
208        // Check for SOF markers (Start of Frame)
209        if (0xC0..=0xCF).contains(&marker) && marker != 0xC4 && marker != 0xC8 && marker != 0xCC {
210            // This is a SOF marker
211            if pos + 7 >= data.len() {
212                return Err(PdfError::InvalidImage("Truncated JPEG file".to_string()));
213            }
214
215            // Skip length
216            pos += 2;
217
218            // Skip precision
219            pos += 1;
220
221            // Read height and width
222            height = ((data[pos] as u32) << 8) | (data[pos + 1] as u32);
223            pos += 2;
224            width = ((data[pos] as u32) << 8) | (data[pos + 1] as u32);
225            pos += 2;
226
227            // Read number of components
228            components = data[pos];
229            break;
230        } else if marker == 0xD9 {
231            // End of image
232            break;
233        } else if marker == 0xD8 || (0xD0..=0xD7).contains(&marker) {
234            // No length field for these markers
235            continue;
236        } else {
237            // Read length and skip segment
238            if pos + 1 >= data.len() {
239                return Err(PdfError::InvalidImage("Truncated JPEG file".to_string()));
240            }
241            let length = ((data[pos] as usize) << 8) | (data[pos + 1] as usize);
242            pos += length;
243        }
244    }
245
246    if width == 0 || height == 0 {
247        return Err(PdfError::InvalidImage(
248            "Could not find image dimensions".to_string(),
249        ));
250    }
251
252    let color_space = match components {
253        1 => ColorSpace::DeviceGray,
254        3 => ColorSpace::DeviceRGB,
255        4 => ColorSpace::DeviceCMYK,
256        _ => {
257            return Err(PdfError::InvalidImage(format!(
258                "Unsupported number of components: {components}"
259            )))
260        }
261    };
262
263    Ok((width, height, color_space, 8)) // JPEG typically uses 8 bits per component
264}
265
266/// Parse PNG header to extract image information
267fn parse_png_header(data: &[u8]) -> Result<(u32, u32, ColorSpace, u8)> {
268    // PNG signature: 8 bytes
269    if data.len() < 8 || &data[0..8] != b"\x89PNG\r\n\x1a\n" {
270        return Err(PdfError::InvalidImage("Not a valid PNG file".to_string()));
271    }
272
273    // Find IHDR chunk (should be first chunk after signature)
274    let mut pos = 8;
275
276    while pos + 8 < data.len() {
277        // Read chunk length (4 bytes, big-endian)
278        let chunk_length =
279            u32::from_be_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]]) as usize;
280
281        // Read chunk type (4 bytes)
282        let chunk_type = &data[pos + 4..pos + 8];
283
284        if chunk_type == b"IHDR" {
285            // IHDR chunk found
286            if pos + 8 + chunk_length > data.len() || chunk_length < 13 {
287                return Err(PdfError::InvalidImage("Invalid PNG IHDR chunk".to_string()));
288            }
289
290            let ihdr_data = &data[pos + 8..pos + 8 + chunk_length];
291
292            // Parse IHDR data
293            let width =
294                u32::from_be_bytes([ihdr_data[0], ihdr_data[1], ihdr_data[2], ihdr_data[3]]);
295
296            let height =
297                u32::from_be_bytes([ihdr_data[4], ihdr_data[5], ihdr_data[6], ihdr_data[7]]);
298
299            let bit_depth = ihdr_data[8];
300            let color_type = ihdr_data[9];
301
302            // Map PNG color types to PDF color spaces
303            let color_space = match color_type {
304                0 => ColorSpace::DeviceGray, // Grayscale
305                2 => ColorSpace::DeviceRGB,  // RGB
306                3 => ColorSpace::DeviceRGB,  // Palette (treated as RGB)
307                4 => ColorSpace::DeviceGray, // Grayscale + Alpha
308                6 => ColorSpace::DeviceRGB,  // RGB + Alpha
309                _ => {
310                    return Err(PdfError::InvalidImage(format!(
311                        "Unsupported PNG color type: {color_type}"
312                    )))
313                }
314            };
315
316            return Ok((width, height, color_space, bit_depth));
317        }
318
319        // Skip to next chunk
320        pos += 8 + chunk_length + 4; // header + data + CRC
321    }
322
323    Err(PdfError::InvalidImage(
324        "PNG IHDR chunk not found".to_string(),
325    ))
326}
327
328/// Parse TIFF header to extract image information
329fn parse_tiff_header(data: &[u8]) -> Result<(u32, u32, ColorSpace, u8)> {
330    if data.len() < 8 {
331        return Err(PdfError::InvalidImage(
332            "Invalid TIFF file: too short".to_string(),
333        ));
334    }
335
336    // Check byte order (first 2 bytes)
337    let (is_little_endian, offset) = if &data[0..2] == b"II" {
338        (true, 2) // Little endian
339    } else if &data[0..2] == b"MM" {
340        (false, 2) // Big endian
341    } else {
342        return Err(PdfError::InvalidImage(
343            "Invalid TIFF byte order".to_string(),
344        ));
345    };
346
347    // Check magic number (should be 42)
348    let magic = if is_little_endian {
349        u16::from_le_bytes([data[offset], data[offset + 1]])
350    } else {
351        u16::from_be_bytes([data[offset], data[offset + 1]])
352    };
353
354    if magic != 42 {
355        return Err(PdfError::InvalidImage(
356            "Invalid TIFF magic number".to_string(),
357        ));
358    }
359
360    // Get offset to first IFD (Image File Directory)
361    let ifd_offset = if is_little_endian {
362        u32::from_le_bytes([
363            data[offset + 2],
364            data[offset + 3],
365            data[offset + 4],
366            data[offset + 5],
367        ])
368    } else {
369        u32::from_be_bytes([
370            data[offset + 2],
371            data[offset + 3],
372            data[offset + 4],
373            data[offset + 5],
374        ])
375    } as usize;
376
377    if ifd_offset + 2 > data.len() {
378        return Err(PdfError::InvalidImage(
379            "Invalid TIFF IFD offset".to_string(),
380        ));
381    }
382
383    // Read number of directory entries
384    let num_entries = if is_little_endian {
385        u16::from_le_bytes([data[ifd_offset], data[ifd_offset + 1]])
386    } else {
387        u16::from_be_bytes([data[ifd_offset], data[ifd_offset + 1]])
388    };
389
390    let mut width = 0u32;
391    let mut height = 0u32;
392    let mut bits_per_sample = 8u16;
393    let mut photometric_interpretation = 0u16;
394
395    // Read directory entries
396    for i in 0..num_entries {
397        let entry_offset = ifd_offset + 2 + (i as usize * 12);
398
399        if entry_offset + 12 > data.len() {
400            break;
401        }
402
403        let tag = if is_little_endian {
404            u16::from_le_bytes([data[entry_offset], data[entry_offset + 1]])
405        } else {
406            u16::from_be_bytes([data[entry_offset], data[entry_offset + 1]])
407        };
408
409        let value_offset = entry_offset + 8;
410
411        match tag {
412            256 => {
413                // ImageWidth
414                width = if is_little_endian {
415                    u32::from_le_bytes([
416                        data[value_offset],
417                        data[value_offset + 1],
418                        data[value_offset + 2],
419                        data[value_offset + 3],
420                    ])
421                } else {
422                    u32::from_be_bytes([
423                        data[value_offset],
424                        data[value_offset + 1],
425                        data[value_offset + 2],
426                        data[value_offset + 3],
427                    ])
428                };
429            }
430            257 => {
431                // ImageHeight
432                height = if is_little_endian {
433                    u32::from_le_bytes([
434                        data[value_offset],
435                        data[value_offset + 1],
436                        data[value_offset + 2],
437                        data[value_offset + 3],
438                    ])
439                } else {
440                    u32::from_be_bytes([
441                        data[value_offset],
442                        data[value_offset + 1],
443                        data[value_offset + 2],
444                        data[value_offset + 3],
445                    ])
446                };
447            }
448            258 => {
449                // BitsPerSample
450                bits_per_sample = if is_little_endian {
451                    u16::from_le_bytes([data[value_offset], data[value_offset + 1]])
452                } else {
453                    u16::from_be_bytes([data[value_offset], data[value_offset + 1]])
454                };
455            }
456            262 => {
457                // PhotometricInterpretation
458                photometric_interpretation = if is_little_endian {
459                    u16::from_le_bytes([data[value_offset], data[value_offset + 1]])
460                } else {
461                    u16::from_be_bytes([data[value_offset], data[value_offset + 1]])
462                };
463            }
464            _ => {} // Skip unknown tags
465        }
466    }
467
468    if width == 0 || height == 0 {
469        return Err(PdfError::InvalidImage(
470            "TIFF dimensions not found".to_string(),
471        ));
472    }
473
474    // Map TIFF photometric interpretation to PDF color space
475    let color_space = match photometric_interpretation {
476        0 | 1 => ColorSpace::DeviceGray, // White is zero | Black is zero
477        2 => ColorSpace::DeviceRGB,      // RGB
478        5 => ColorSpace::DeviceCMYK,     // CMYK
479        _ => ColorSpace::DeviceRGB,      // Default to RGB
480    };
481
482    Ok((width, height, color_space, bits_per_sample as u8))
483}
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    #[test]
490    fn test_parse_jpeg_header() {
491        // Minimal JPEG header for testing
492        let jpeg_data = vec![
493            0xFF, 0xD8, // SOI marker
494            0xFF, 0xC0, // SOF0 marker
495            0x00, 0x11, // Length (17 bytes)
496            0x08, // Precision (8 bits)
497            0x00, 0x64, // Height (100)
498            0x00, 0xC8, // Width (200)
499            0x03, // Components (3 = RGB)
500                  // ... rest of data
501        ];
502
503        let result = parse_jpeg_header(&jpeg_data);
504        assert!(result.is_ok());
505        let (width, height, color_space, bits) = result.unwrap();
506        assert_eq!(width, 200);
507        assert_eq!(height, 100);
508        assert_eq!(color_space, ColorSpace::DeviceRGB);
509        assert_eq!(bits, 8);
510    }
511
512    #[test]
513    fn test_invalid_jpeg() {
514        let invalid_data = vec![0x00, 0x00];
515        let result = parse_jpeg_header(&invalid_data);
516        assert!(result.is_err());
517    }
518
519    #[test]
520    fn test_parse_png_header() {
521        // Minimal PNG header for testing
522        let mut png_data = vec![
523            0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature
524            0x00, 0x00, 0x00, 0x0D, // IHDR chunk length (13)
525            0x49, 0x48, 0x44, 0x52, // IHDR chunk type
526            0x00, 0x00, 0x00, 0x64, // Width (100)
527            0x00, 0x00, 0x00, 0x64, // Height (100)
528            0x08, // Bit depth (8)
529            0x02, // Color type (2 = RGB)
530            0x00, // Compression method
531            0x00, // Filter method
532            0x00, // Interlace method
533        ];
534
535        // Add CRC (simplified - just 4 bytes)
536        png_data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00]);
537
538        let result = parse_png_header(&png_data);
539        assert!(result.is_ok());
540        let (width, height, color_space, bits) = result.unwrap();
541        assert_eq!(width, 100);
542        assert_eq!(height, 100);
543        assert_eq!(color_space, ColorSpace::DeviceRGB);
544        assert_eq!(bits, 8);
545    }
546
547    #[test]
548    fn test_invalid_png() {
549        let invalid_data = vec![0x00, 0x00];
550        let result = parse_png_header(&invalid_data);
551        assert!(result.is_err());
552    }
553
554    #[test]
555    fn test_parse_tiff_header_little_endian() {
556        // Minimal TIFF header for testing (little endian)
557        let tiff_data = vec![
558            0x49, 0x49, // Little endian byte order
559            0x2A, 0x00, // Magic number (42)
560            0x08, 0x00, 0x00, 0x00, // Offset to first IFD
561            0x03, 0x00, // Number of directory entries
562            // ImageWidth tag (256)
563            0x00, 0x01, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
564            // ImageHeight tag (257)
565            0x01, 0x01, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
566            // BitsPerSample tag (258)
567            0x02, 0x01, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
568            0x00, 0x00, // Next IFD offset (0 = none)
569        ];
570
571        let result = parse_tiff_header(&tiff_data);
572        assert!(result.is_ok());
573        let (width, height, color_space, bits) = result.unwrap();
574        assert_eq!(width, 100);
575        assert_eq!(height, 100);
576        assert_eq!(color_space, ColorSpace::DeviceGray);
577        assert_eq!(bits, 8);
578    }
579
580    #[test]
581    fn test_parse_tiff_header_big_endian() {
582        // Minimal TIFF header for testing (big endian)
583        let tiff_data = vec![
584            0x4D, 0x4D, // Big endian byte order
585            0x00, 0x2A, // Magic number (42)
586            0x00, 0x00, 0x00, 0x08, // Offset to first IFD
587            0x00, 0x03, // Number of directory entries
588            // ImageWidth tag (256)
589            0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x64,
590            // ImageHeight tag (257)
591            0x01, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x64,
592            // BitsPerSample tag (258)
593            0x01, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00,
594            0x00, 0x00, // Next IFD offset (0 = none)
595        ];
596
597        let result = parse_tiff_header(&tiff_data);
598        assert!(result.is_ok());
599        let (width, height, color_space, bits) = result.unwrap();
600        assert_eq!(width, 100);
601        assert_eq!(height, 100);
602        assert_eq!(color_space, ColorSpace::DeviceGray);
603        assert_eq!(bits, 8);
604    }
605
606    #[test]
607    fn test_invalid_tiff() {
608        let invalid_data = vec![0x00, 0x00];
609        let result = parse_tiff_header(&invalid_data);
610        assert!(result.is_err());
611    }
612
613    #[test]
614    fn test_image_format_enum() {
615        assert_eq!(ImageFormat::Jpeg, ImageFormat::Jpeg);
616        assert_eq!(ImageFormat::Png, ImageFormat::Png);
617        assert_eq!(ImageFormat::Tiff, ImageFormat::Tiff);
618        assert_ne!(ImageFormat::Jpeg, ImageFormat::Png);
619    }
620}