Skip to main content

justpdf_core/image/
mod.rs

1use crate::error::{JustPdfError, Result};
2use crate::object::{PdfDict, PdfObject};
3use crate::stream;
4use crate::stream::dct;
5
6/// Information about a PDF image XObject.
7#[derive(Debug, Clone)]
8pub struct ImageInfo {
9    /// Image width in pixels.
10    pub width: u32,
11    /// Image height in pixels.
12    pub height: u32,
13    /// Bits per component (1, 2, 4, 8, 16).
14    pub bits_per_component: u32,
15    /// Color space name.
16    pub color_space: Vec<u8>,
17    /// Number of color components.
18    pub num_components: u32,
19    /// Filter used to encode the image data.
20    pub filter: Option<Vec<u8>>,
21    /// Whether this is an image mask.
22    pub is_mask: bool,
23    /// Whether there is a soft mask.
24    pub has_smask: bool,
25}
26
27/// Extract image info from an image XObject dictionary.
28pub fn image_info(dict: &PdfDict) -> Option<ImageInfo> {
29    let width = dict.get_i64(b"Width")? as u32;
30    let height = dict.get_i64(b"Height")? as u32;
31
32    let is_mask = dict
33        .get(b"ImageMask")
34        .and_then(|o| o.as_bool())
35        .unwrap_or(false);
36
37    let bits_per_component = if is_mask {
38        1
39    } else {
40        dict.get_i64(b"BitsPerComponent").unwrap_or(8) as u32
41    };
42
43    let color_space = dict
44        .get(b"ColorSpace")
45        .and_then(|o| o.as_name())
46        .unwrap_or(if is_mask { b"DeviceGray" } else { b"DeviceRGB" })
47        .to_vec();
48
49    let num_components = match color_space.as_slice() {
50        b"DeviceGray" | b"CalGray" | b"G" => 1,
51        b"DeviceRGB" | b"CalRGB" | b"RGB" => 3,
52        b"DeviceCMYK" | b"CMYK" => 4,
53        _ => 3, // default assumption
54    };
55
56    let filter = match dict.get(b"Filter") {
57        Some(PdfObject::Name(n)) => Some(n.clone()),
58        Some(PdfObject::Array(arr)) => arr.last().and_then(|o| o.as_name()).map(|n| n.to_vec()),
59        _ => None,
60    };
61
62    let has_smask = dict.get(b"SMask").is_some();
63
64    Some(ImageInfo {
65        width,
66        height,
67        bits_per_component,
68        color_space,
69        num_components,
70        filter,
71        is_mask,
72        has_smask,
73    })
74}
75
76/// Decoded image data with metadata.
77#[derive(Debug, Clone)]
78pub struct DecodedImage {
79    pub width: u32,
80    pub height: u32,
81    /// Number of color components.
82    pub components: u32,
83    /// Bits per component.
84    pub bpc: u32,
85    /// Raw pixel data (decoded).
86    pub data: Vec<u8>,
87    /// The image format the data came from.
88    pub source_format: ImageFormat,
89}
90
91/// The original encoding format of the image.
92#[derive(Debug, Clone, Copy, PartialEq, Eq)]
93pub enum ImageFormat {
94    /// Raw/uncompressed or FlateDecode.
95    Raw,
96    /// JPEG (DCTDecode).
97    Jpeg,
98    /// JPEG2000 (JPXDecode).
99    Jpeg2000,
100    /// JBIG2.
101    Jbig2,
102    /// CCITT Fax.
103    CcittFax,
104}
105
106/// Decode an image XObject's stream data.
107pub fn decode_image(raw_data: &[u8], dict: &PdfDict) -> Result<DecodedImage> {
108    let info = image_info(dict).ok_or_else(|| JustPdfError::StreamDecode {
109        filter: "image".into(),
110        detail: "missing Width or Height in image dict".into(),
111    })?;
112
113    let filter = info.filter.as_deref();
114
115    match filter {
116        Some(b"DCTDecode") | Some(b"DCT") => {
117            let decoded = dct::decode(raw_data)?;
118            Ok(DecodedImage {
119                width: decoded.width,
120                height: decoded.height,
121                components: decoded.color_type.components() as u32,
122                bpc: 8,
123                data: decoded.data,
124                source_format: ImageFormat::Jpeg,
125            })
126        }
127        Some(b"JPXDecode") => {
128            let jp2_image = justjp2::decode(raw_data).map_err(|e| JustPdfError::StreamDecode {
129                filter: "JPXDecode".into(),
130                detail: format!("JPEG2000 decode error: {e}"),
131            })?;
132            let num_comp = jp2_image.components.len() as u32;
133            if num_comp == 0 || jp2_image.components[0].data.is_empty() {
134                return Err(JustPdfError::StreamDecode {
135                    filter: "JPXDecode".into(),
136                    detail: "empty JPEG2000 image".into(),
137                });
138            }
139            let w = jp2_image.width;
140            let h = jp2_image.height;
141            let pixel_count = (w * h) as usize;
142            // Interleave components into a flat pixel buffer, clamping i32→u8
143            let mut data = Vec::with_capacity(pixel_count * num_comp as usize);
144            for i in 0..pixel_count {
145                for comp in &jp2_image.components {
146                    let val = comp.data.get(i).copied().unwrap_or(0);
147                    data.push(val.clamp(0, 255) as u8);
148                }
149            }
150            Ok(DecodedImage {
151                width: w,
152                height: h,
153                components: num_comp,
154                bpc: 8,
155                data,
156                source_format: ImageFormat::Jpeg2000,
157            })
158        }
159        Some(b"JBIG2Decode") => {
160            let pages = justbig2::decode_embedded(raw_data).map_err(|e| {
161                JustPdfError::StreamDecode {
162                    filter: "JBIG2Decode".into(),
163                    detail: format!("JBIG2 decode error: {e}"),
164                }
165            })?;
166            let page = pages.into_iter().next().ok_or_else(|| {
167                JustPdfError::StreamDecode {
168                    filter: "JBIG2Decode".into(),
169                    detail: "no pages decoded from JBIG2 stream".into(),
170                }
171            })?;
172            // JBIG2: 1BPP packed, MSB-first, stride-aligned rows
173            // Expand to 1-byte-per-pixel grayscale
174            // JBIG2 convention: 1=black→0x00, 0=white→0xFF
175            let w = page.width;
176            let h = page.height;
177            let pixel_count = (w * h) as usize;
178            let mut data = Vec::with_capacity(pixel_count);
179            for y in 0..h {
180                for x in 0..w {
181                    let byte_idx = (y * page.stride + x / 8) as usize;
182                    let bit_idx = 7 - (x % 8);
183                    let bit = if byte_idx < page.data.len() {
184                        (page.data[byte_idx] >> bit_idx) & 1
185                    } else {
186                        0
187                    };
188                    data.push(if bit != 0 { 0x00 } else { 0xFF });
189                }
190            }
191            Ok(DecodedImage {
192                width: w,
193                height: h,
194                components: 1,
195                bpc: 8,
196                data,
197                source_format: ImageFormat::Jbig2,
198            })
199        }
200        Some(b"CCITTFaxDecode") | Some(b"CCF") => {
201            // CCITT data is decoded by the stream decoder into 1-byte-per-pixel data
202            // (0x00=white, 0xFF=black). We pass it through as 8bpc grayscale.
203            let decoded = stream::decode_stream(raw_data, dict)?;
204            Ok(DecodedImage {
205                width: info.width,
206                height: info.height,
207                components: 1,
208                bpc: 8,
209                data: decoded,
210                source_format: ImageFormat::CcittFax,
211            })
212        }
213        _ => {
214            // Raw or FlateDecode (already decoded by stream decoder)
215            let decoded = stream::decode_stream(raw_data, dict)?;
216            Ok(DecodedImage {
217                width: info.width,
218                height: info.height,
219                components: info.num_components,
220                bpc: info.bits_per_component,
221                data: decoded,
222                source_format: ImageFormat::Raw,
223            })
224        }
225    }
226}
227
228/// Extract raw JPEG bytes from a DCTDecode image stream (passthrough, no decoding).
229pub fn extract_jpeg_bytes(raw_data: &[u8], dict: &PdfDict) -> Result<Vec<u8>> {
230    let filter = match dict.get(b"Filter") {
231        Some(PdfObject::Name(n)) => n.clone(),
232        Some(PdfObject::Array(arr)) => arr.last().and_then(|o| o.as_name()).unwrap_or(b"").to_vec(),
233        _ => Vec::new(),
234    };
235
236    if filter == b"DCTDecode" || filter == b"DCT" {
237        Ok(raw_data.to_vec())
238    } else {
239        Err(JustPdfError::StreamDecode {
240            filter: "image".into(),
241            detail: "not a JPEG image".into(),
242        })
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    #[test]
251    fn test_image_info_basic() {
252        let mut dict = PdfDict::new();
253        dict.insert(b"Type".to_vec(), PdfObject::Name(b"XObject".to_vec()));
254        dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Image".to_vec()));
255        dict.insert(b"Width".to_vec(), PdfObject::Integer(100));
256        dict.insert(b"Height".to_vec(), PdfObject::Integer(200));
257        dict.insert(b"BitsPerComponent".to_vec(), PdfObject::Integer(8));
258        dict.insert(
259            b"ColorSpace".to_vec(),
260            PdfObject::Name(b"DeviceRGB".to_vec()),
261        );
262
263        let info = image_info(&dict).unwrap();
264        assert_eq!(info.width, 100);
265        assert_eq!(info.height, 200);
266        assert_eq!(info.bits_per_component, 8);
267        assert_eq!(info.num_components, 3);
268        assert!(!info.is_mask);
269    }
270
271    #[test]
272    fn test_image_info_mask() {
273        let mut dict = PdfDict::new();
274        dict.insert(b"Width".to_vec(), PdfObject::Integer(50));
275        dict.insert(b"Height".to_vec(), PdfObject::Integer(50));
276        dict.insert(b"ImageMask".to_vec(), PdfObject::Bool(true));
277
278        let info = image_info(&dict).unwrap();
279        assert!(info.is_mask);
280        assert_eq!(info.bits_per_component, 1);
281    }
282
283    #[test]
284    fn test_image_info_jpeg() {
285        let mut dict = PdfDict::new();
286        dict.insert(b"Width".to_vec(), PdfObject::Integer(640));
287        dict.insert(b"Height".to_vec(), PdfObject::Integer(480));
288        dict.insert(b"BitsPerComponent".to_vec(), PdfObject::Integer(8));
289        dict.insert(
290            b"ColorSpace".to_vec(),
291            PdfObject::Name(b"DeviceRGB".to_vec()),
292        );
293        dict.insert(b"Filter".to_vec(), PdfObject::Name(b"DCTDecode".to_vec()));
294
295        let info = image_info(&dict).unwrap();
296        assert_eq!(info.filter, Some(b"DCTDecode".to_vec()));
297    }
298
299    #[test]
300    fn test_image_info_missing_dims() {
301        let dict = PdfDict::new();
302        assert!(image_info(&dict).is_none());
303    }
304
305    #[test]
306    fn test_image_info_cmyk() {
307        let mut dict = PdfDict::new();
308        dict.insert(b"Width".to_vec(), PdfObject::Integer(100));
309        dict.insert(b"Height".to_vec(), PdfObject::Integer(100));
310        dict.insert(
311            b"ColorSpace".to_vec(),
312            PdfObject::Name(b"DeviceCMYK".to_vec()),
313        );
314
315        let info = image_info(&dict).unwrap();
316        assert_eq!(info.num_components, 4);
317    }
318}