Skip to main content

pdf_xfa/
image_bridge.rs

1//! Image embedding for PDF XObjects.
2//!
3//! Converts XFA image data (JPEG/PNG) into PDF Image XObject dictionaries
4//! and provides PDF content stream operators for rendering.
5
6use flate2::write::ZlibEncoder;
7use flate2::Compression;
8use image::GenericImageView;
9use lopdf::{dictionary, Object, ObjectId, Stream};
10use std::io::Write;
11/// ImageFormat.
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum ImageFormat {
15    /// Jpeg.
16    Jpeg,
17    /// Png.
18    Png,
19}
20/// ImageXObjectResult.
21
22#[derive(Debug, Clone)]
23pub struct ImageXObjectResult {
24    /// object_id.
25    pub object_id: ObjectId,
26    /// width.
27    pub width: u32,
28    /// height.
29    pub height: u32,
30}
31/// detect_image_format.
32pub fn detect_image_format(data: &[u8]) -> Option<ImageFormat> {
33    if data.len() >= 3 && data[0] == 0xFF && data[1] == 0xD8 && data[2] == 0xFF {
34        Some(ImageFormat::Jpeg)
35    } else if data.len() >= 8 && data[0..4] == [0x89, 0x50, 0x4E, 0x47] {
36        Some(ImageFormat::Png)
37    } else {
38        None
39    }
40}
41/// embed_jpeg.
42pub fn embed_jpeg(
43    doc: &mut lopdf::Document,
44    jpeg_data: &[u8],
45) -> Result<ImageXObjectResult, String> {
46    let (width, height, components) = parse_jpeg_dimensions(jpeg_data)
47        .map_err(|e| format!("failed to parse JPEG dimensions: {}", e))?;
48
49    let color_space = match components {
50        1 => Object::Name(b"DeviceGray".to_vec()),
51        3 => Object::Name(b"DeviceRGB".to_vec()),
52        4 => Object::Name(b"DeviceCMYK".to_vec()),
53        _ => Object::Name(b"DeviceRGB".to_vec()),
54    };
55
56    let stream_dict = dictionary! {
57        "Type" => "XObject",
58        "Subtype" => "Image",
59        "Width" => Object::Integer(width as i64),
60        "Height" => Object::Integer(height as i64),
61        "BitsPerComponent" => Object::Integer(8),
62        "ColorSpace" => color_space,
63        "Filter" => "DCTDecode",
64        "Length" => Object::Integer(jpeg_data.len() as i64),
65    };
66
67    let stream = Stream::new(stream_dict, jpeg_data.to_vec());
68    let object_id = doc.add_object(Object::Stream(stream));
69
70    Ok(ImageXObjectResult {
71        object_id,
72        width,
73        height,
74    })
75}
76/// embed_png.
77pub fn embed_png(doc: &mut lopdf::Document, png_data: &[u8]) -> Result<ImageXObjectResult, String> {
78    let img = image::load_from_memory_with_format(png_data, image::ImageFormat::Png)
79        .map_err(|e| format!("failed to decode PNG: {}", e))?;
80
81    let (width, height) = img.dimensions();
82    let has_alpha = img.color().has_alpha();
83
84    let (raw_rgb, alpha_channel) = if has_alpha {
85        let rgba = img.to_rgba8();
86        let mut rgb = Vec::with_capacity((width * height * 3) as usize);
87        let mut alpha = Vec::with_capacity((width * height) as usize);
88        for pixel in rgba.pixels() {
89            rgb.extend_from_slice(&pixel.0[..3]);
90            alpha.push(pixel.0[3]);
91        }
92        (rgb, Some(alpha))
93    } else {
94        (img.to_rgb8().into_raw(), None)
95    };
96
97    let compressed_rgb =
98        flate_compress(&raw_rgb).map_err(|e| format!("compression failed: {}", e))?;
99
100    let mut stream_dict = dictionary! {
101        "Type" => "XObject",
102        "Subtype" => "Image",
103        "Width" => Object::Integer(width as i64),
104        "Height" => Object::Integer(height as i64),
105        "BitsPerComponent" => Object::Integer(8),
106        "ColorSpace" => "DeviceRGB",
107        "Filter" => "FlateDecode",
108        "Length" => Object::Integer(compressed_rgb.len() as i64),
109    };
110
111    if let Some(alpha) = alpha_channel {
112        let compressed_alpha =
113            flate_compress(&alpha).map_err(|e| format!("alpha compression failed: {}", e))?;
114        let smask_dict = dictionary! {
115            "Type" => "XObject",
116            "Subtype" => "Image",
117            "Width" => Object::Integer(width as i64),
118            "Height" => Object::Integer(height as i64),
119            "BitsPerComponent" => Object::Integer(8),
120            "ColorSpace" => "DeviceGray",
121            "Filter" => "FlateDecode",
122            "Length" => Object::Integer(compressed_alpha.len() as i64),
123        };
124        let smask_stream = Stream::new(smask_dict, compressed_alpha);
125        let smask_id = doc.add_object(Object::Stream(smask_stream));
126        stream_dict.set("SMask", Object::Reference(smask_id));
127    }
128
129    let stream = Stream::new(stream_dict, compressed_rgb);
130    let object_id = doc.add_object(Object::Stream(stream));
131
132    Ok(ImageXObjectResult {
133        object_id,
134        width,
135        height,
136    })
137}
138/// embed_image.
139pub fn embed_image(
140    doc: &mut lopdf::Document,
141    data: &[u8],
142    mime_type: &str,
143) -> Result<ImageXObjectResult, String> {
144    let format = detect_image_format(data).or(match mime_type {
145        "image/jpeg" | "image/jpg" => Some(ImageFormat::Jpeg),
146        "image/png" => Some(ImageFormat::Png),
147        _ => None,
148    });
149
150    match format {
151        Some(ImageFormat::Jpeg) => embed_jpeg(doc, data),
152        Some(ImageFormat::Png) => embed_png(doc, data),
153        // XFA 3.3 §20.2 allows JPEG, PNG, GIF, BMP, TIFF. For anything the
154        // native embedders don't handle (GIF/BMP/TIFF/etc.), let the `image`
155        // crate decode the bytes and re-encode as PNG before embedding —
156        // this preserves the image at the cost of one decode/encode pass
157        // instead of dropping it entirely (see 01de9ce4's Finance Corp logo
158        // which ships as image/tif).
159        None => embed_via_reencode(doc, data, mime_type),
160    }
161}
162
163fn embed_via_reencode(
164    doc: &mut lopdf::Document,
165    data: &[u8],
166    mime_type: &str,
167) -> Result<ImageXObjectResult, String> {
168    let img = image::load_from_memory(data)
169        .map_err(|e| format!("unsupported image format (mime={mime_type}); decode failed: {e}"))?;
170    let mut png_buf: Vec<u8> = Vec::new();
171    img.write_to(
172        &mut std::io::Cursor::new(&mut png_buf),
173        image::ImageFormat::Png,
174    )
175    .map_err(|e| format!("re-encode to PNG failed: {e}"))?;
176    embed_png(doc, &png_buf)
177}
178/// render_image_ops.
179pub fn render_image_ops(name: &str, x: f64, y: f64, w: f64, h: f64) -> Vec<u8> {
180    let mut ops = Vec::new();
181    ops.extend_from_slice(b"q\n");
182    ops.extend(format!("{:.2} 0 0 {:.2} {:.2} {:.2} cm\n", w, h, x, y).bytes());
183    ops.extend(format!("/{name} Do\n",).bytes());
184    ops.extend_from_slice(b"Q\n");
185    ops
186}
187
188fn parse_jpeg_dimensions(data: &[u8]) -> Result<(u32, u32, u8), String> {
189    if data.len() < 4 || data[0] != 0xFF || data[1] != 0xD8 {
190        return Err("not a valid JPEG".into());
191    }
192
193    let mut i = 2;
194    while i + 1 < data.len() {
195        if data[i] != 0xFF {
196            return Err("invalid JPEG marker".into());
197        }
198
199        let marker = data[i + 1];
200
201        if marker == 0xFF {
202            i += 1;
203            continue;
204        }
205
206        let is_sof = matches!(marker, 0xC0..=0xC3 | 0xC5..=0xC7 | 0xC9..=0xCB | 0xCD..=0xCF);
207
208        if is_sof {
209            if i + 9 >= data.len() {
210                return Err("truncated JPEG SOF".into());
211            }
212            let height = u16::from_be_bytes([data[i + 5], data[i + 6]]) as u32;
213            let width = u16::from_be_bytes([data[i + 7], data[i + 8]]) as u32;
214            let components = data[i + 9];
215            return Ok((width, height, components));
216        }
217
218        if i + 3 >= data.len() {
219            break;
220        }
221        let segment_len = u16::from_be_bytes([data[i + 2], data[i + 3]]) as usize;
222        i += 2 + segment_len;
223    }
224
225    Err("no SOF marker found in JPEG".into())
226}
227
228fn flate_compress(data: &[u8]) -> Result<Vec<u8>, String> {
229    let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
230    encoder
231        .write_all(data)
232        .map_err(|e| format!("compression failed: {}", e))?;
233    encoder
234        .finish()
235        .map_err(|e| format!("compression finalize failed: {}", e))
236}
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    fn minimal_jpeg() -> Vec<u8> {
243        let mut data = Vec::new();
244        data.extend_from_slice(&[0xFF, 0xD8]);
245        data.extend_from_slice(&[0xFF, 0xC0]);
246        data.extend_from_slice(&[0x00, 0x11]);
247        data.push(0x08);
248        data.extend_from_slice(&[0x00, 0x02]);
249        data.extend_from_slice(&[0x00, 0x02]);
250        data.push(0x03);
251        for id in 1..=3u8 {
252            data.push(id);
253            data.push(0x11);
254            data.push(0x00);
255        }
256        data.extend_from_slice(&[0xFF, 0xD9]);
257        data
258    }
259
260    fn minimal_png() -> Vec<u8> {
261        use std::io::Cursor;
262        let mut buf = Cursor::new(Vec::new());
263        let img = image::RgbaImage::from_pixel(2, 2, image::Rgba([255, 0, 0, 128]));
264        img.write_to(&mut buf, image::ImageFormat::Png).unwrap();
265        buf.into_inner()
266    }
267
268    #[test]
269    fn test_detect_format_jpeg() {
270        let jpeg = minimal_jpeg();
271        assert_eq!(detect_image_format(&jpeg), Some(ImageFormat::Jpeg));
272    }
273
274    #[test]
275    fn test_detect_format_png() {
276        let png = minimal_png();
277        assert_eq!(detect_image_format(&png), Some(ImageFormat::Png));
278    }
279
280    #[test]
281    fn test_detect_format_unknown() {
282        assert_eq!(detect_image_format(&[0x00, 0x01, 0x02]), None);
283    }
284
285    #[test]
286    fn test_embed_jpeg() {
287        let mut doc = lopdf::Document::with_version("1.7");
288        let jpeg = minimal_jpeg();
289        let result = embed_jpeg(&mut doc, &jpeg).unwrap();
290        assert_eq!(result.width, 2);
291        assert_eq!(result.height, 2);
292    }
293
294    #[test]
295    fn test_embed_png() {
296        let mut doc = lopdf::Document::with_version("1.7");
297        let png = minimal_png();
298        let result = embed_png(&mut doc, &png).unwrap();
299        assert_eq!(result.width, 2);
300        assert_eq!(result.height, 2);
301    }
302
303    #[test]
304    fn test_embed_image_tiff_via_reencode() {
305        // XFA templates sometimes ship image/tif data (e.g. 01de9ce4's
306        // Finance Corp logo). Before the re-encode fallback these would be
307        // dropped with "unsupported image format". Now they should be
308        // decoded by the `image` crate and re-embedded as PNG.
309        use std::io::Cursor;
310        let img = image::RgbaImage::from_pixel(3, 4, image::Rgba([32, 64, 96, 255]));
311        let mut tiff_buf = Cursor::new(Vec::new());
312        img.write_to(&mut tiff_buf, image::ImageFormat::Tiff)
313            .unwrap();
314        let tiff_data = tiff_buf.into_inner();
315        assert_eq!(detect_image_format(&tiff_data), None);
316
317        let mut doc = lopdf::Document::with_version("1.7");
318        let result = embed_image(&mut doc, &tiff_data, "image/tif")
319            .expect("TIFF should be accepted via re-encode fallback");
320        assert_eq!(result.width, 3);
321        assert_eq!(result.height, 4);
322    }
323
324    #[test]
325    fn test_embed_image_gif_via_reencode() {
326        // GIF is also allowed by XFA 3.3 §20.2 but not natively supported
327        // by embed_jpeg/embed_png. Verify it goes through the re-encode path.
328        use std::io::Cursor;
329        let img = image::RgbaImage::from_pixel(2, 2, image::Rgba([10, 20, 30, 255]));
330        let mut gif_buf = Cursor::new(Vec::new());
331        img.write_to(&mut gif_buf, image::ImageFormat::Gif).unwrap();
332        let gif_data = gif_buf.into_inner();
333
334        let mut doc = lopdf::Document::with_version("1.7");
335        let result = embed_image(&mut doc, &gif_data, "image/gif")
336            .expect("GIF should be accepted via re-encode fallback");
337        assert_eq!(result.width, 2);
338        assert_eq!(result.height, 2);
339    }
340
341    #[test]
342    fn test_render_image_ops() {
343        let ops = render_image_ops("Im1", 100.0, 200.0, 50.0, 75.0);
344        let content = String::from_utf8_lossy(&ops);
345        assert!(content.contains("q\n"));
346        assert!(content.contains("50.00 0 0 75.00 100.00 200.00 cm\n"));
347        assert!(content.contains("/Im1 Do\n"));
348        assert!(content.contains("Q\n"));
349    }
350}