1#![allow(clippy::many_single_char_names)]
2#![allow(dead_code)] use itertools::Itertools;
5
6use crate as pdf;
7use crate::error::*;
8use crate::object::{Object, Resolve, Stream};
9use crate::primitive::{Primitive, Dictionary};
10use std::convert::{TryFrom, TryInto};
11use std::io::{Read, Write};
12use once_cell::sync::OnceCell;
13use datasize::DataSize;
14
15
16#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
17pub struct LZWFlateParams {
18 #[pdf(key="Predictor", default="1")]
19 pub predictor: i32,
20 #[pdf(key="Colors", default="1")]
21 pub n_components: i32,
22 #[pdf(key="BitsPerComponent", default="8")]
23 pub bits_per_component: i32,
24 #[pdf(key="Columns", default="1")]
25 pub columns: i32,
26 #[pdf(key="EarlyChange", default="1")]
27 pub early_change: i32,
28}
29impl Default for LZWFlateParams {
30 fn default() -> LZWFlateParams {
31 LZWFlateParams {
32 predictor: 1,
33 n_components: 1,
34 bits_per_component: 8,
35 columns: 1,
36 early_change: 1
37 }
38 }
39}
40
41#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
42pub struct DCTDecodeParams {
43 #[pdf(key="ColorTransform")]
49 pub color_transform: Option<i32>,
50}
51
52#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
53pub struct CCITTFaxDecodeParams {
54 #[pdf(key="K", default="0")]
55 pub k: i32,
56
57 #[pdf(key="EndOfLine", default="false")]
58 pub end_of_line: bool,
59
60 #[pdf(key="EncodedByteAlign", default="false")]
61 pub encoded_byte_align: bool,
62
63 #[pdf(key="Columns", default="1728")]
64 pub columns: u32,
65
66 #[pdf(key="Rows", default="0")]
67 pub rows: u32,
68
69 #[pdf(key="EndOfBlock", default="true")]
70 pub end_of_block: bool,
71
72 #[pdf(key="BlackIs1", default="false")]
73 pub black_is_1: bool,
74
75 #[pdf(key="DamagedRowsBeforeError", default="0")]
76 pub damaged_rows_before_error: u32,
77}
78
79#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
80pub struct JBIG2DecodeParams {
81 #[pdf(key="JBIG2Globals")]
82 pub globals: Option<Stream<()>>
83}
84#[derive(Debug, Clone, DataSize, DeepClone)]
85pub enum StreamFilter {
86 ASCIIHexDecode,
87 ASCII85Decode,
88 LZWDecode (LZWFlateParams),
89 FlateDecode (LZWFlateParams),
90 JPXDecode, DCTDecode (DCTDecodeParams),
92 CCITTFaxDecode (CCITTFaxDecodeParams),
93 JBIG2Decode(JBIG2DecodeParams),
94 Crypt,
95 RunLengthDecode
96}
97impl StreamFilter {
98 pub fn from_kind_and_params(kind: &str, params: Dictionary, r: &impl Resolve) -> Result<StreamFilter> {
99 let params = Primitive::Dictionary (params);
100 Ok(
101 match kind {
102 "ASCIIHexDecode" => StreamFilter::ASCIIHexDecode,
103 "ASCII85Decode" => StreamFilter::ASCII85Decode,
104 "LZWDecode" => StreamFilter::LZWDecode (LZWFlateParams::from_primitive(params, r)?),
105 "FlateDecode" => StreamFilter::FlateDecode (LZWFlateParams::from_primitive(params, r)?),
106 "JPXDecode" => StreamFilter::JPXDecode,
107 "DCTDecode" => StreamFilter::DCTDecode (DCTDecodeParams::from_primitive(params, r)?),
108 "CCITTFaxDecode" => StreamFilter::CCITTFaxDecode (CCITTFaxDecodeParams::from_primitive(params, r)?),
109 "JBIG2Decode" => StreamFilter::JBIG2Decode(JBIG2DecodeParams::from_primitive(params, r)?),
110 "Crypt" => StreamFilter::Crypt,
111 "RunLengthDecode" => StreamFilter::RunLengthDecode,
112 ty => bail!("Unrecognized filter type {:?}", ty),
113 }
114 )
115 }
116}
117
118#[inline]
119pub fn decode_nibble(c: u8) -> Option<u8> {
120 match c {
121 n @ b'0' ..= b'9' => Some(n - b'0'),
122 a @ b'a' ..= b'h' => Some(a - b'a' + 0xa),
123 a @ b'A' ..= b'H' => Some(a - b'A' + 0xA),
124 _ => None
125 }
126}
127
128#[inline]
129fn encode_nibble(c: u8) -> u8 {
130 match c {
131 0 ..= 9 => b'0'+ c,
132 10 ..= 15 => b'a' - 10 + c,
133 _ => unreachable!()
134 }
135}
136
137
138pub fn decode_hex(data: &[u8]) -> Result<Vec<u8>> {
139 let mut out = Vec::with_capacity(data.len() / 2);
140 let pairs = data.iter().cloned()
141 .take_while(|&b| b != b'>')
142 .filter(|&b| !matches!(b, 0 | 9 | 10 | 12 | 13 | 32))
143 .tuples();
144 for (i, (high, low)) in pairs.enumerate() {
145 if let (Some(low), Some(high)) = (decode_nibble(low), decode_nibble(high)) {
146 out.push(high << 4 | low);
147 } else {
148 return Err(PdfError::HexDecode {pos: i * 2, bytes: [high, low]})
149 }
150 }
151 Ok(out)
152}
153pub fn encode_hex(data: &[u8]) -> Vec<u8> {
154 let mut buf = Vec::with_capacity(data.len() * 2);
155 for &b in data {
156 buf.push(encode_nibble(b >> 4));
157 buf.push(encode_nibble(b & 0xf));
158 }
159 buf
160}
161
162#[inline]
163fn sym_85(byte: u8) -> Option<u8> {
164 match byte {
165 b @ 0x21 ..= 0x75 => Some(b - 0x21),
166 _ => None
167 }
168}
169
170fn word_85([a, b, c, d, e]: [u8; 5]) -> Option<[u8; 4]> {
171 fn s(b: u8) -> Option<u64> { sym_85(b).map(|n| n as u64) }
172 let (a, b, c, d, e) = (s(a)?, s(b)?, s(c)?, s(d)?, s(e)?);
173 let q = (((a * 85 + b) * 85 + c) * 85 + d) * 85 + e;
174 let r = u32::try_from(q).ok()?;
176 Some(r.to_be_bytes())
177}
178
179pub fn decode_85(data: &[u8]) -> Result<Vec<u8>> {
180 let mut out = Vec::with_capacity((data.len() + 4) / 5 * 4);
181
182 let mut stream = data.iter().cloned()
183 .filter(|&b| !matches!(b, b' ' | b'\n' | b'\r' | b'\t'));
184
185 let mut symbols = stream.by_ref()
186 .take_while(|&b| b != b'~');
187
188 let (tail_len, tail) = loop {
189 match symbols.next() {
190 Some(b'z') => out.extend_from_slice(&[0; 4]),
191 Some(a) => {
192 let (b, c, d, e) = match (symbols.next(), symbols.next(), symbols.next(), symbols.next()) {
193 (Some(b), Some(c), Some(d), Some(e)) => (b, c, d, e),
194 (None, _, _, _) => break (1, [a, b'u', b'u', b'u', b'u']),
195 (Some(b), None, _, _) => break (2, [a, b, b'u', b'u', b'u']),
196 (Some(b), Some(c), None, _) => break (3, [a, b, c, b'u', b'u']),
197 (Some(b), Some(c), Some(d), None) => break (4, [a, b, c, d, b'u']),
198 };
199 out.extend_from_slice(&word_85([a, b, c, d, e]).ok_or(PdfError::Ascii85TailError)?);
200 }
201 None => break (0, [b'u'; 5])
202 }
203 };
204
205 if tail_len > 0 {
206 let last = word_85(tail).ok_or(PdfError::Ascii85TailError)?;
207 out.extend_from_slice(&last[.. tail_len-1]);
208 }
209
210 match (stream.next(), stream.next()) {
211 (Some(b'>'), None) => Ok(out),
212 _ => Err(PdfError::Ascii85TailError)
213 }
214}
215
216#[inline]
217fn divmod(n: u32, m: u32) -> (u32, u32) {
218 (n / m, n % m)
219}
220
221#[inline]
222fn a85(n: u32) -> u8 {
223 n as u8 + 0x21
224}
225
226#[inline]
227fn base85_chunk(c: [u8; 4]) -> [u8; 5] {
228 let n = u32::from_be_bytes(c);
229 let (n, e) = divmod(n, 85);
230 let (n, d) = divmod(n, 85);
231 let (n, c) = divmod(n, 85);
232 let (a, b) = divmod(n, 85);
233
234 [a85(a), a85(b), a85(c), a85(d), a85(e)]
235}
236
237fn encode_85(data: &[u8]) -> Vec<u8> {
238 let mut buf = Vec::with_capacity((data.len() / 4) * 5 + 10);
239 let mut chunks = data.chunks_exact(4);
240 for chunk in chunks.by_ref() {
241 let c: [u8; 4] = chunk.try_into().unwrap();
242 if c == [0; 4] {
243 buf.push(b'z');
244 } else {
245 buf.extend_from_slice(&base85_chunk(c));
246 }
247 }
248
249 let r = chunks.remainder();
250 if r.len() > 0 {
251 let mut c = [0; 4];
252 c[.. r.len()].copy_from_slice(r);
253 let out = base85_chunk(c);
254 buf.extend_from_slice(&out[.. r.len() + 1]);
255 }
256 buf.extend_from_slice(b"~>");
257 buf
258}
259
260fn inflate_bytes_zlib(data: &[u8]) -> Result<Vec<u8>> {
261 use libflate::zlib::Decoder;
262 let mut decoder = Decoder::new(data)?;
263 let mut decoded = Vec::new();
264 decoder.read_to_end(&mut decoded)?;
265 Ok(decoded)
266}
267
268fn inflate_bytes(data: &[u8]) -> Result<Vec<u8>> {
269 use libflate::deflate::Decoder;
270 let mut decoder = Decoder::new(data);
271 let mut decoded = Vec::new();
272 decoder.read_to_end(&mut decoded)?;
273 Ok(decoded)
274}
275
276pub fn flate_decode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
277
278 let predictor = params.predictor as usize;
279 let n_components = params.n_components as usize;
280 let columns = params.columns as usize;
281 let stride = columns * n_components;
282
283
284 let decoded = {
286 if let Ok(data) = inflate_bytes_zlib(data) {
287 data
288 } else if let Ok(data) = inflate_bytes(data) {
289 data
290 } else {
291 dump_data(data);
292 bail!("can't inflate");
293 }
294 };
295 if predictor > 10 {
299 let inp = decoded; let rows = inp.len() / (stride+1);
301
302 let mut out = vec![0; rows * stride];
304
305 let null_vec = vec![0; stride];
307
308 let mut in_off = 0; let mut out_off = 0; let mut last_out_off = 0; while in_off + stride < inp.len() {
314 let predictor = PredictorType::from_u8(inp[in_off])?;
315 in_off += 1; let row_in = &inp[in_off .. in_off + stride];
318 let (prev_row, row_out) = if out_off == 0 {
319 (&null_vec[..], &mut out[out_off .. out_off+stride])
320 } else {
321 let (prev, curr) = out.split_at_mut(out_off);
322 (&prev[last_out_off ..], &mut curr[.. stride])
323 };
324 unfilter(predictor, n_components, prev_row, row_in, row_out);
325
326 last_out_off = out_off;
327
328 in_off += stride;
329 out_off += stride;
330 }
331 Ok(out)
332 } else {
333 Ok(decoded)
334 }
335}
336fn flate_encode(data: &[u8]) -> Vec<u8> {
337 use libflate::deflate::Encoder;
338 let mut encoded = Vec::new();
339 let mut encoder = Encoder::new(&mut encoded);
340 encoder.write_all(data).unwrap();
341 encoded
342}
343
344pub fn dct_decode(data: &[u8], _params: &DCTDecodeParams) -> Result<Vec<u8>> {
345 use jpeg_decoder::Decoder;
346 let mut decoder = Decoder::new(data);
347 let pixels = decoder.decode()?;
348 Ok(pixels)
349}
350
351pub fn lzw_decode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
352 use weezl::{BitOrder, decode::Decoder};
353 let mut out = vec![];
354
355 let mut decoder = if params.early_change != 0 {
356 Decoder::with_tiff_size_switch(BitOrder::Msb, 9)
357 } else {
358 Decoder::new(BitOrder::Msb, 9)
359 };
360
361 decoder
362 .into_stream(&mut out)
363 .decode_all(data).status?;
364 Ok(out)
365}
366fn lzw_encode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
367 use weezl::{BitOrder, encode::Encoder};
368 if params.early_change != 0 {
369 bail!("encoding early_change != 0 is not supported");
370 }
371 let mut compressed = vec![];
372 Encoder::new(BitOrder::Msb, 9)
373 .into_stream(&mut compressed)
374 .encode_all(data).status?;
375 Ok(compressed)
376}
377
378pub fn fax_decode(data: &[u8], params: &CCITTFaxDecodeParams) -> Result<Vec<u8>> {
379 use fax::{Color, decoder::{pels, decode_g4}};
380
381 if params.k < 0 {
382 let columns = params.columns as usize;
383 let rows = params.rows as usize;
384
385 let height = if params.rows == 0 { None } else { Some(params.rows as u16)};
386 let mut buf = Vec::with_capacity(columns * rows);
387 decode_g4(data.iter().cloned(), columns as u16, height, |line| {
388 buf.extend(pels(line, columns as u16).map(|c| match c {
389 Color::Black => 0,
390 Color::White => 255
391 }));
392 assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns);
393 }).ok_or(PdfError::Other { msg: "faxdecode failed".into() })?;
394 assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns);
395
396 if rows != 0 && buf.len() != columns * rows {
397 bail!("decoded length does not match (expected {rows}∙{columns}, got {})", buf.len());
398 }
399 Ok(buf)
400 } else {
401 unimplemented!()
402 }
403}
404
405pub fn run_length_decode(data: &[u8]) -> Result<Vec<u8>> {
406 let mut buf = Vec::new();
408 let d = data;
409 let mut c = 0;
410
411 while c < data.len() {
412 let length = d[c]; if length < 128 {
414 let start = c + 1;
415 let end = start + length as usize + 1;
416 buf.extend_from_slice(&d[start..end]);
418 c = end; } else if length >= 129 {
420 let copy = 257 - length as usize; let b = d[c + 1]; buf.extend(std::iter::repeat(b).take(copy));
423 c += 2; } else {
425 break; }
427 }
428
429 Ok(buf)
430}
431
432pub type DecodeFn = dyn Fn(&[u8]) -> Result<Vec<u8>> + Sync + Send + 'static;
433static JPX_DECODER: OnceCell<Box<DecodeFn>> = OnceCell::new();
434static JBIG2_DECODER: OnceCell<Box<DecodeFn>> = OnceCell::new();
435
436pub fn set_jpx_decoder(f: Box<DecodeFn>) {
437 let _ = JPX_DECODER.set(f);
438}
439pub fn set_jbig2_decoder(f: Box<DecodeFn>) {
440 let _ = JBIG2_DECODER.set(f);
441}
442
443pub fn jpx_decode(data: &[u8]) -> Result<Vec<u8>> {
444 JPX_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jp2k decoder not set".into()})?(data)
445}
446pub fn jbig2_decode(data: &[u8], globals: &[u8]) -> Result<Vec<u8>> {
447 let data = [
448 globals,
452 data,
453
454 &[0x00, 0x00, 0x00, 0x03, 0x31, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00],
456
457 &[0x00, 0x00, 0x00, 0x04, 0x33, 0x01, 0x00, 0x00, 0x00, 0x00],
459 ].concat();
460 JBIG2_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jbig2 decoder not set".into()})?(&data)
461}
462
463pub fn decode(data: &[u8], filter: &StreamFilter) -> Result<Vec<u8>> {
464 match *filter {
465 StreamFilter::ASCIIHexDecode => decode_hex(data),
466 StreamFilter::ASCII85Decode => decode_85(data),
467 StreamFilter::LZWDecode(ref params) => lzw_decode(data, params),
468 StreamFilter::FlateDecode(ref params) => flate_decode(data, params),
469 StreamFilter::RunLengthDecode => run_length_decode(data),
470 StreamFilter::DCTDecode(ref params) => dct_decode(data, params),
471
472 _ => bail!("unimplemented {filter:?}"),
473 }
474}
475
476pub fn encode(data: &[u8], filter: &StreamFilter) -> Result<Vec<u8>> {
477 match *filter {
478 StreamFilter::ASCIIHexDecode => Ok(encode_hex(data)),
479 StreamFilter::ASCII85Decode => Ok(encode_85(data)),
480 StreamFilter::LZWDecode(ref params) => lzw_encode(data, params),
481 StreamFilter::FlateDecode (ref _params) => Ok(flate_encode(data)),
482 _ => unimplemented!(),
483 }
484}
485
486#[derive(Debug, Clone, Copy, PartialEq, Eq)]
491#[repr(u8)]
492#[allow(dead_code)]
493pub enum PredictorType {
494 NoFilter = 0,
495 Sub = 1,
496 Up = 2,
497 Avg = 3,
498 Paeth = 4
499}
500
501impl PredictorType {
502 pub fn from_u8(n: u8) -> Result<PredictorType> {
504 match n {
505 0 => Ok(PredictorType::NoFilter),
506 1 => Ok(PredictorType::Sub),
507 2 => Ok(PredictorType::Up),
508 3 => Ok(PredictorType::Avg),
509 4 => Ok(PredictorType::Paeth),
510 n => Err(PdfError::IncorrectPredictorType {n})
511 }
512 }
513}
514
515fn filter_paeth(a: u8, b: u8, c: u8) -> u8 {
516 let ia = a as i16;
517 let ib = b as i16;
518 let ic = c as i16;
519
520 let p = ia + ib - ic;
521
522 let pa = (p - ia).abs();
523 let pb = (p - ib).abs();
524 let pc = (p - ic).abs();
525
526 if pa <= pb && pa <= pc {
527 a
528 } else if pb <= pc {
529 b
530 } else {
531 c
532 }
533}
534
535pub fn unfilter(filter: PredictorType, bpp: usize, prev: &[u8], inp: &[u8], out: &mut [u8]) {
536 use self::PredictorType::*;
537 let len = inp.len();
538 assert_eq!(len, out.len());
539 assert_eq!(len, prev.len());
540 if bpp > len {
541 return;
542 }
543
544 match filter {
545 NoFilter => {
546 out[..len].copy_from_slice(&inp[..len]);
547 }
548 Sub => {
549 out[..bpp].copy_from_slice(&inp[..bpp]);
550
551 for i in bpp..len {
552 out[i] = inp[i].wrapping_add(out[i - bpp]);
553 }
554 }
555 Up => {
556 for i in 0..len {
557 out[i] = inp[i].wrapping_add(prev[i]);
558 }
559 }
560 Avg => {
561 for i in 0..bpp {
562 out[i] = inp[i].wrapping_add(prev[i] / 2);
563 }
564
565 for i in bpp..len {
566 out[i] = inp[i].wrapping_add(
567 ((out[i - bpp] as i16 + prev[i] as i16) / 2) as u8
568 );
569 }
570 }
571 Paeth => {
572 for i in 0..bpp {
573 out[i] = inp[i].wrapping_add(
574 filter_paeth(0, prev[i], 0)
575 );
576 }
577
578 for i in bpp..len {
579 out[i] = inp[i].wrapping_add(
580 filter_paeth(out[i - bpp], prev[i], prev[i - bpp])
581 );
582 }
583 }
584 }
585}
586
587#[allow(unused)]
588pub fn filter(method: PredictorType, bpp: usize, previous: &[u8], current: &mut [u8]) {
589 use self::PredictorType::*;
590 let len = current.len();
591
592 match method {
593 NoFilter => (),
594 Sub => {
595 for i in (bpp..len).rev() {
596 current[i] = current[i].wrapping_sub(current[i - bpp]);
597 }
598 }
599 Up => {
600 for i in 0..len {
601 current[i] = current[i].wrapping_sub(previous[i]);
602 }
603 }
604 Avg => {
605 for i in (bpp..len).rev() {
606 current[i] = current[i].wrapping_sub(current[i - bpp].wrapping_add(previous[i]) / 2);
607 }
608
609 for i in 0..bpp {
610 current[i] = current[i].wrapping_sub(previous[i] / 2);
611 }
612 }
613 Paeth => {
614 for i in (bpp..len).rev() {
615 current[i] = current[i].wrapping_sub(filter_paeth(current[i - bpp], previous[i], previous[i - bpp]));
616 }
617
618 for i in 0..bpp {
619 current[i] = current[i].wrapping_sub(filter_paeth(0, previous[i], 0));
620 }
621 }
622 }
623}
624
625#[cfg(test)]
626mod tests {
627 use super::*;
628
629 #[test]
630 fn base_85() {
631 fn s(b: &[u8]) -> &str { std::str::from_utf8(b).unwrap() }
632
633 let case = &b"hello world!"[..];
634 let encoded = encode_85(case);
635 assert_eq!(s(&encoded), "BOu!rD]j7BEbo80~>");
636 let decoded = decode_85(&encoded).unwrap();
637 assert_eq!(case, &*decoded);
638 }
650
651 #[test]
652 fn run_length_decode_test() {
653 let x = run_length_decode(&[254, b'a', 255, b'b', 2, b'c', b'b', b'c', 254, b'a', 128]).unwrap();
654 assert_eq!(b"aaabbcbcaaa", x.as_slice());
655 }
656}