1use crate::error::{JustPdfError, Result};
2use crate::object::{PdfDict, PdfObject};
3use crate::stream;
4use crate::stream::dct;
5
6#[derive(Debug, Clone)]
8pub struct ImageInfo {
9 pub width: u32,
11 pub height: u32,
13 pub bits_per_component: u32,
15 pub color_space: Vec<u8>,
17 pub num_components: u32,
19 pub filter: Option<Vec<u8>>,
21 pub is_mask: bool,
23 pub has_smask: bool,
25}
26
27pub fn image_info(dict: &PdfDict) -> Option<ImageInfo> {
29 let width = dict.get_i64(b"Width")? as u32;
30 let height = dict.get_i64(b"Height")? as u32;
31
32 let is_mask = dict
33 .get(b"ImageMask")
34 .and_then(|o| o.as_bool())
35 .unwrap_or(false);
36
37 let bits_per_component = if is_mask {
38 1
39 } else {
40 dict.get_i64(b"BitsPerComponent").unwrap_or(8) as u32
41 };
42
43 let color_space = dict
44 .get(b"ColorSpace")
45 .and_then(|o| o.as_name())
46 .unwrap_or(if is_mask { b"DeviceGray" } else { b"DeviceRGB" })
47 .to_vec();
48
49 let num_components = match color_space.as_slice() {
50 b"DeviceGray" | b"CalGray" | b"G" => 1,
51 b"DeviceRGB" | b"CalRGB" | b"RGB" => 3,
52 b"DeviceCMYK" | b"CMYK" => 4,
53 _ => 3, };
55
56 let filter = match dict.get(b"Filter") {
57 Some(PdfObject::Name(n)) => Some(n.clone()),
58 Some(PdfObject::Array(arr)) => arr.last().and_then(|o| o.as_name()).map(|n| n.to_vec()),
59 _ => None,
60 };
61
62 let has_smask = dict.get(b"SMask").is_some();
63
64 Some(ImageInfo {
65 width,
66 height,
67 bits_per_component,
68 color_space,
69 num_components,
70 filter,
71 is_mask,
72 has_smask,
73 })
74}
75
76#[derive(Debug, Clone)]
78pub struct DecodedImage {
79 pub width: u32,
80 pub height: u32,
81 pub components: u32,
83 pub bpc: u32,
85 pub data: Vec<u8>,
87 pub source_format: ImageFormat,
89}
90
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
93pub enum ImageFormat {
94 Raw,
96 Jpeg,
98 Jpeg2000,
100 Jbig2,
102 CcittFax,
104}
105
106pub fn decode_image(raw_data: &[u8], dict: &PdfDict) -> Result<DecodedImage> {
108 let info = image_info(dict).ok_or_else(|| JustPdfError::StreamDecode {
109 filter: "image".into(),
110 detail: "missing Width or Height in image dict".into(),
111 })?;
112
113 let filter = info.filter.as_deref();
114
115 match filter {
116 Some(b"DCTDecode") | Some(b"DCT") => {
117 let decoded = dct::decode(raw_data)?;
118 Ok(DecodedImage {
119 width: decoded.width,
120 height: decoded.height,
121 components: decoded.color_type.components() as u32,
122 bpc: 8,
123 data: decoded.data,
124 source_format: ImageFormat::Jpeg,
125 })
126 }
127 Some(b"JPXDecode") => {
128 let jp2_image = justjp2::decode(raw_data).map_err(|e| JustPdfError::StreamDecode {
129 filter: "JPXDecode".into(),
130 detail: format!("JPEG2000 decode error: {e}"),
131 })?;
132 let num_comp = jp2_image.components.len() as u32;
133 if num_comp == 0 || jp2_image.components[0].data.is_empty() {
134 return Err(JustPdfError::StreamDecode {
135 filter: "JPXDecode".into(),
136 detail: "empty JPEG2000 image".into(),
137 });
138 }
139 let w = jp2_image.width;
140 let h = jp2_image.height;
141 let pixel_count = (w * h) as usize;
142 let mut data = Vec::with_capacity(pixel_count * num_comp as usize);
144 for i in 0..pixel_count {
145 for comp in &jp2_image.components {
146 let val = comp.data.get(i).copied().unwrap_or(0);
147 data.push(val.clamp(0, 255) as u8);
148 }
149 }
150 Ok(DecodedImage {
151 width: w,
152 height: h,
153 components: num_comp,
154 bpc: 8,
155 data,
156 source_format: ImageFormat::Jpeg2000,
157 })
158 }
159 Some(b"JBIG2Decode") => {
160 let pages = justbig2::decode_embedded(raw_data).map_err(|e| {
161 JustPdfError::StreamDecode {
162 filter: "JBIG2Decode".into(),
163 detail: format!("JBIG2 decode error: {e}"),
164 }
165 })?;
166 let page = pages.into_iter().next().ok_or_else(|| {
167 JustPdfError::StreamDecode {
168 filter: "JBIG2Decode".into(),
169 detail: "no pages decoded from JBIG2 stream".into(),
170 }
171 })?;
172 let w = page.width;
176 let h = page.height;
177 let pixel_count = (w * h) as usize;
178 let mut data = Vec::with_capacity(pixel_count);
179 for y in 0..h {
180 for x in 0..w {
181 let byte_idx = (y * page.stride + x / 8) as usize;
182 let bit_idx = 7 - (x % 8);
183 let bit = if byte_idx < page.data.len() {
184 (page.data[byte_idx] >> bit_idx) & 1
185 } else {
186 0
187 };
188 data.push(if bit != 0 { 0x00 } else { 0xFF });
189 }
190 }
191 Ok(DecodedImage {
192 width: w,
193 height: h,
194 components: 1,
195 bpc: 8,
196 data,
197 source_format: ImageFormat::Jbig2,
198 })
199 }
200 Some(b"CCITTFaxDecode") | Some(b"CCF") => {
201 let decoded = stream::decode_stream(raw_data, dict)?;
204 Ok(DecodedImage {
205 width: info.width,
206 height: info.height,
207 components: 1,
208 bpc: 8,
209 data: decoded,
210 source_format: ImageFormat::CcittFax,
211 })
212 }
213 _ => {
214 let decoded = stream::decode_stream(raw_data, dict)?;
216 Ok(DecodedImage {
217 width: info.width,
218 height: info.height,
219 components: info.num_components,
220 bpc: info.bits_per_component,
221 data: decoded,
222 source_format: ImageFormat::Raw,
223 })
224 }
225 }
226}
227
228pub fn extract_jpeg_bytes(raw_data: &[u8], dict: &PdfDict) -> Result<Vec<u8>> {
230 let filter = match dict.get(b"Filter") {
231 Some(PdfObject::Name(n)) => n.clone(),
232 Some(PdfObject::Array(arr)) => arr.last().and_then(|o| o.as_name()).unwrap_or(b"").to_vec(),
233 _ => Vec::new(),
234 };
235
236 if filter == b"DCTDecode" || filter == b"DCT" {
237 Ok(raw_data.to_vec())
238 } else {
239 Err(JustPdfError::StreamDecode {
240 filter: "image".into(),
241 detail: "not a JPEG image".into(),
242 })
243 }
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 #[test]
251 fn test_image_info_basic() {
252 let mut dict = PdfDict::new();
253 dict.insert(b"Type".to_vec(), PdfObject::Name(b"XObject".to_vec()));
254 dict.insert(b"Subtype".to_vec(), PdfObject::Name(b"Image".to_vec()));
255 dict.insert(b"Width".to_vec(), PdfObject::Integer(100));
256 dict.insert(b"Height".to_vec(), PdfObject::Integer(200));
257 dict.insert(b"BitsPerComponent".to_vec(), PdfObject::Integer(8));
258 dict.insert(
259 b"ColorSpace".to_vec(),
260 PdfObject::Name(b"DeviceRGB".to_vec()),
261 );
262
263 let info = image_info(&dict).unwrap();
264 assert_eq!(info.width, 100);
265 assert_eq!(info.height, 200);
266 assert_eq!(info.bits_per_component, 8);
267 assert_eq!(info.num_components, 3);
268 assert!(!info.is_mask);
269 }
270
271 #[test]
272 fn test_image_info_mask() {
273 let mut dict = PdfDict::new();
274 dict.insert(b"Width".to_vec(), PdfObject::Integer(50));
275 dict.insert(b"Height".to_vec(), PdfObject::Integer(50));
276 dict.insert(b"ImageMask".to_vec(), PdfObject::Bool(true));
277
278 let info = image_info(&dict).unwrap();
279 assert!(info.is_mask);
280 assert_eq!(info.bits_per_component, 1);
281 }
282
283 #[test]
284 fn test_image_info_jpeg() {
285 let mut dict = PdfDict::new();
286 dict.insert(b"Width".to_vec(), PdfObject::Integer(640));
287 dict.insert(b"Height".to_vec(), PdfObject::Integer(480));
288 dict.insert(b"BitsPerComponent".to_vec(), PdfObject::Integer(8));
289 dict.insert(
290 b"ColorSpace".to_vec(),
291 PdfObject::Name(b"DeviceRGB".to_vec()),
292 );
293 dict.insert(b"Filter".to_vec(), PdfObject::Name(b"DCTDecode".to_vec()));
294
295 let info = image_info(&dict).unwrap();
296 assert_eq!(info.filter, Some(b"DCTDecode".to_vec()));
297 }
298
299 #[test]
300 fn test_image_info_missing_dims() {
301 let dict = PdfDict::new();
302 assert!(image_info(&dict).is_none());
303 }
304
305 #[test]
306 fn test_image_info_cmyk() {
307 let mut dict = PdfDict::new();
308 dict.insert(b"Width".to_vec(), PdfObject::Integer(100));
309 dict.insert(b"Height".to_vec(), PdfObject::Integer(100));
310 dict.insert(
311 b"ColorSpace".to_vec(),
312 PdfObject::Name(b"DeviceCMYK".to_vec()),
313 );
314
315 let info = image_info(&dict).unwrap();
316 assert_eq!(info.num_components, 4);
317 }
318}