1use std::io::{Read, Write};
2
3use flate2::Compression;
4use flate2::read::ZlibDecoder;
5use flate2::write::ZlibEncoder;
6
7use crate::error::{PdfError, PdfResult};
8use crate::types::{PdfStream, PdfValue};
9
10pub fn flate_encode(data: &[u8]) -> PdfResult<Vec<u8>> {
14 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
15 encoder
16 .write_all(data)
17 .map_err(|error| PdfError::Corrupt(format!("flate encode failed: {error}")))?;
18 encoder
19 .finish()
20 .map_err(|error| PdfError::Corrupt(format!("flate encode finalize failed: {error}")))
21}
22
23pub fn decode_stream(stream: &PdfStream) -> PdfResult<Vec<u8>> {
24 let inflated = match stream.dict.get("Filter") {
25 None => stream.data.clone(),
26 Some(PdfValue::Name(name)) if name == "FlateDecode" => inflate(stream.data.as_slice())?,
27 Some(PdfValue::Array(filters)) if filters.len() == 1 => match filters.first() {
28 Some(PdfValue::Name(name)) if name == "FlateDecode" => inflate(stream.data.as_slice())?,
29 _ => {
30 return Err(PdfError::Unsupported(
31 "only a single FlateDecode filter is supported".to_string(),
32 ));
33 }
34 },
35 Some(_) => {
36 return Err(PdfError::Unsupported(
37 "unsupported stream filter configuration".to_string(),
38 ));
39 }
40 };
41
42 apply_predictor(&inflated, stream.dict.get("DecodeParms"))
43}
44
45const MAX_DECOMPRESSED_SIZE: u64 = 256 * 1024 * 1024;
48
49fn inflate(data: &[u8]) -> PdfResult<Vec<u8>> {
50 let decoder = ZlibDecoder::new(data);
51 let mut output = Vec::new();
52 decoder
53 .take(MAX_DECOMPRESSED_SIZE + 1)
54 .read_to_end(&mut output)
55 .map_err(|error| PdfError::Corrupt(format!("failed to decode flate stream: {error}")))?;
56 if output.len() as u64 > MAX_DECOMPRESSED_SIZE {
57 return Err(PdfError::Corrupt(
58 "decompressed stream exceeds maximum allowed size".to_string(),
59 ));
60 }
61 Ok(output)
62}
63
64fn apply_predictor(data: &[u8], decode_parms: Option<&PdfValue>) -> PdfResult<Vec<u8>> {
65 let parms = match decode_parms {
66 None => return Ok(data.to_vec()),
67 Some(PdfValue::Dictionary(dict)) => dict,
68 Some(PdfValue::Null) => return Ok(data.to_vec()),
69 Some(PdfValue::Array(_)) => {
70 return Err(PdfError::Unsupported(
74 "per-filter DecodeParms arrays are not supported".to_string(),
75 ));
76 }
77 Some(_) => {
78 return Err(PdfError::Corrupt(
79 "DecodeParms is not a dictionary".to_string(),
80 ));
81 }
82 };
83
84 let predictor = parms
85 .get("Predictor")
86 .and_then(PdfValue::as_integer)
87 .unwrap_or(1);
88 match predictor {
89 1 => Ok(data.to_vec()),
90 2 => tiff_predictor_decode(data, parms),
91 10..=15 => png_predictor_decode(data, parms),
92 other => Err(PdfError::Unsupported(format!(
93 "predictor {other} is not supported"
94 ))),
95 }
96}
97
98fn tiff_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
99 let columns = parms
100 .get("Columns")
101 .and_then(PdfValue::as_integer)
102 .unwrap_or(1) as usize;
103 let colors = parms
104 .get("Colors")
105 .and_then(PdfValue::as_integer)
106 .unwrap_or(1) as usize;
107 let bits_per_component = parms
108 .get("BitsPerComponent")
109 .and_then(PdfValue::as_integer)
110 .unwrap_or(8) as usize;
111
112 if bits_per_component != 8 {
113 return Err(PdfError::Unsupported(format!(
114 "TIFF predictor with BitsPerComponent {bits_per_component} is not supported"
115 )));
116 }
117 if columns == 0 || colors == 0 {
118 return Err(PdfError::Corrupt(
119 "TIFF predictor Columns/Colors must be positive".to_string(),
120 ));
121 }
122 let row_stride = columns * colors;
123 if data.len() % row_stride != 0 {
124 return Err(PdfError::Corrupt(format!(
125 "TIFF predictor row length mismatch: data={} stride={row_stride}",
126 data.len()
127 )));
128 }
129 let mut output = Vec::with_capacity(data.len());
130 for row in data.chunks_exact(row_stride) {
131 for (component_index, byte) in row.iter().enumerate() {
132 if component_index < colors {
133 output.push(*byte);
135 } else {
136 let previous = output[output.len() - colors];
137 output.push(previous.wrapping_add(*byte));
138 }
139 }
140 }
141 Ok(output)
142}
143
144fn png_predictor_decode(data: &[u8], parms: &crate::types::PdfDictionary) -> PdfResult<Vec<u8>> {
145 let columns = parms
146 .get("Columns")
147 .and_then(PdfValue::as_integer)
148 .unwrap_or(1) as usize;
149 let colors = parms
150 .get("Colors")
151 .and_then(PdfValue::as_integer)
152 .unwrap_or(1) as usize;
153 let bits_per_component = parms
154 .get("BitsPerComponent")
155 .and_then(PdfValue::as_integer)
156 .unwrap_or(8) as usize;
157
158 if bits_per_component != 8 {
159 return Err(PdfError::Unsupported(format!(
160 "PNG predictor with BitsPerComponent {bits_per_component} is not supported"
161 )));
162 }
163 if columns == 0 || colors == 0 {
164 return Err(PdfError::Corrupt(
165 "PNG predictor Columns/Colors must be positive".to_string(),
166 ));
167 }
168 let bytes_per_pixel = colors; let row_data_len = columns * bytes_per_pixel;
170 let row_stride = row_data_len + 1; if data.len() % row_stride != 0 {
173 return Err(PdfError::Corrupt(format!(
174 "PNG predictor row length mismatch: data={} stride={row_stride}",
175 data.len()
176 )));
177 }
178 let row_count = data.len() / row_stride;
179 let mut output = Vec::with_capacity(row_count * row_data_len);
180 let mut prev_row = vec![0u8; row_data_len];
181 let mut row = vec![0u8; row_data_len];
182
183 for r in 0..row_count {
184 let base = r * row_stride;
185 let filter = data[base];
186 let src = &data[base + 1..base + row_stride];
187 row.copy_from_slice(src);
188 match filter {
189 0 => {} 1 => {
191 for i in 0..row_data_len {
193 let left = if i >= bytes_per_pixel {
194 row[i - bytes_per_pixel]
195 } else {
196 0
197 };
198 row[i] = row[i].wrapping_add(left);
199 }
200 }
201 2 => {
202 for i in 0..row_data_len {
204 row[i] = row[i].wrapping_add(prev_row[i]);
205 }
206 }
207 3 => {
208 for i in 0..row_data_len {
210 let left = if i >= bytes_per_pixel {
211 row[i - bytes_per_pixel]
212 } else {
213 0
214 };
215 let up = prev_row[i];
216 let avg = ((left as u16 + up as u16) / 2) as u8;
217 row[i] = row[i].wrapping_add(avg);
218 }
219 }
220 4 => {
221 for i in 0..row_data_len {
223 let left = if i >= bytes_per_pixel {
224 row[i - bytes_per_pixel]
225 } else {
226 0
227 };
228 let up = prev_row[i];
229 let up_left = if i >= bytes_per_pixel {
230 prev_row[i - bytes_per_pixel]
231 } else {
232 0
233 };
234 row[i] = row[i].wrapping_add(paeth(left, up, up_left));
235 }
236 }
237 other => {
238 return Err(PdfError::Corrupt(format!(
239 "unknown PNG row filter type {other}"
240 )));
241 }
242 }
243 output.extend_from_slice(&row);
244 prev_row.copy_from_slice(&row);
245 }
246
247 Ok(output)
248}
249
250fn paeth(a: u8, b: u8, c: u8) -> u8 {
251 let p = a as i32 + b as i32 - c as i32;
252 let pa = (p - a as i32).abs();
253 let pb = (p - b as i32).abs();
254 let pc = (p - c as i32).abs();
255 if pa <= pb && pa <= pc {
256 a
257 } else if pb <= pc {
258 b
259 } else {
260 c
261 }
262}
263
264#[cfg(test)]
265mod tests {
266 use super::*;
267 use crate::types::{PdfDictionary, PdfStream, PdfValue};
268 use flate2::{Compression, write::ZlibEncoder};
269 use std::io::Write;
270
271 fn make_stream(dict: PdfDictionary, data: Vec<u8>) -> PdfStream {
272 PdfStream { dict, data }
273 }
274
275 #[test]
276 fn passthrough_when_no_filter() {
277 let dict = PdfDictionary::new();
278 let stream = make_stream(dict, vec![1, 2, 3]);
279 assert_eq!(decode_stream(&stream).unwrap(), vec![1, 2, 3]);
280 }
281
282 #[test]
283 fn inflates_flate_decode() {
284 let raw = b"hello world";
285 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
286 encoder.write_all(raw).unwrap();
287 let compressed = encoder.finish().unwrap();
288 let mut dict = PdfDictionary::new();
289 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
290 let stream = make_stream(dict, compressed);
291 assert_eq!(decode_stream(&stream).unwrap(), raw.to_vec());
292 }
293
294 #[test]
295 fn applies_png_up_predictor() {
296 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
298
299 let mut encoded = Vec::new();
301 encoded.push(0); encoded.extend_from_slice(&original[0..4]);
303 encoded.push(2); let diff: Vec<u8> = original[4..8]
305 .iter()
306 .zip(original[0..4].iter())
307 .map(|(v, up)| v.wrapping_sub(*up))
308 .collect();
309 encoded.extend_from_slice(&diff);
310
311 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
312 encoder.write_all(&encoded).unwrap();
313 let compressed = encoder.finish().unwrap();
314
315 let mut dict = PdfDictionary::new();
316 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
317 let mut parms = PdfDictionary::new();
318 parms.insert("Predictor".to_string(), PdfValue::Integer(12));
319 parms.insert("Columns".to_string(), PdfValue::Integer(4));
320 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
321
322 let stream = make_stream(dict, compressed);
323 let decoded = decode_stream(&stream).expect("decode");
324 assert_eq!(decoded, original.to_vec());
325 }
326
327 #[test]
328 fn applies_tiff_predictor() {
329 let original: [u8; 8] = [10, 20, 30, 40, 15, 22, 33, 44];
331
332 let mut encoded = Vec::new();
335 for row in original.chunks(4) {
336 encoded.push(row[0]);
337 for index in 1..row.len() {
338 encoded.push(row[index].wrapping_sub(row[index - 1]));
339 }
340 }
341
342 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
343 encoder.write_all(&encoded).unwrap();
344 let compressed = encoder.finish().unwrap();
345
346 let mut dict = PdfDictionary::new();
347 dict.insert("Filter".to_string(), PdfValue::Name("FlateDecode".into()));
348 let mut parms = PdfDictionary::new();
349 parms.insert("Predictor".to_string(), PdfValue::Integer(2));
350 parms.insert("Columns".to_string(), PdfValue::Integer(4));
351 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
352
353 let stream = make_stream(dict, compressed);
354 let decoded = decode_stream(&stream).expect("decode");
355 assert_eq!(decoded, original.to_vec());
356 }
357
358 #[test]
359 fn rejects_unsupported_predictor() {
360 let mut dict = PdfDictionary::new();
361 let mut parms = PdfDictionary::new();
362 parms.insert("Predictor".to_string(), PdfValue::Integer(3));
363 dict.insert("DecodeParms".to_string(), PdfValue::Dictionary(parms));
364 let stream = make_stream(dict, vec![0, 0, 0, 0]);
365 match decode_stream(&stream) {
366 Err(PdfError::Unsupported(msg)) => {
367 assert!(msg.contains("predictor"), "got: {msg}")
368 }
369 other => panic!("expected Unsupported, got: {other:?}"),
370 }
371 }
372}