pdf/
enc.rs

1#![allow(clippy::many_single_char_names)]
2#![allow(dead_code)]  // TODO
3
4use itertools::Itertools;
5
6use crate as pdf;
7use crate::error::*;
8use crate::object::{Object, Resolve, Stream};
9use crate::primitive::{Primitive, Dictionary};
10use std::convert::{TryFrom, TryInto};
11use std::io::{Read, Write};
12use once_cell::sync::OnceCell;
13use datasize::DataSize;
14
15
16#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
17pub struct LZWFlateParams {
18    #[pdf(key="Predictor", default="1")]
19    pub predictor: i32,
20    #[pdf(key="Colors", default="1")]
21    pub n_components: i32,
22    #[pdf(key="BitsPerComponent", default="8")]
23    pub bits_per_component: i32,
24    #[pdf(key="Columns", default="1")]
25    pub columns: i32,
26    #[pdf(key="EarlyChange", default="1")]
27    pub early_change: i32,
28}
29impl Default for LZWFlateParams {
30    fn default() -> LZWFlateParams {
31        LZWFlateParams {
32            predictor: 1,
33            n_components: 1,
34            bits_per_component: 8,
35            columns: 1,
36            early_change: 1
37        }
38    }
39}
40
41#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
42pub struct DCTDecodeParams {
43    // TODO The default value of ColorTransform is 1 if the image has three components and 0 otherwise.
44    // 0:   No transformation.
45    // 1:   If the image has three color components, transform RGB values to YUV before encoding and from YUV to RGB after decoding.
46    //      If the image has four components, transform CMYK values to YUVK before encoding and from YUVK to CMYK after decoding.
47    //      This option is ignored if the image has one or two color components.
48    #[pdf(key="ColorTransform")]
49    pub color_transform: Option<i32>,
50}
51
52#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
53pub struct CCITTFaxDecodeParams {
54    #[pdf(key="K", default="0")]
55    pub k: i32,
56
57    #[pdf(key="EndOfLine", default="false")]
58    pub end_of_line: bool,
59
60    #[pdf(key="EncodedByteAlign", default="false")]
61    pub encoded_byte_align: bool,
62
63    #[pdf(key="Columns", default="1728")]
64    pub columns: u32,
65
66    #[pdf(key="Rows", default="0")]
67    pub rows: u32,
68
69    #[pdf(key="EndOfBlock", default="true")]
70    pub end_of_block: bool,
71
72    #[pdf(key="BlackIs1", default="false")]
73    pub black_is_1: bool,
74
75    #[pdf(key="DamagedRowsBeforeError", default="0")]
76    pub damaged_rows_before_error: u32,
77}
78
79#[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)]
80pub struct JBIG2DecodeParams {
81    #[pdf(key="JBIG2Globals")]
82    pub globals: Option<Stream<()>>
83}
84#[derive(Debug, Clone, DataSize, DeepClone)]
85pub enum StreamFilter {
86    ASCIIHexDecode,
87    ASCII85Decode,
88    LZWDecode (LZWFlateParams),
89    FlateDecode (LZWFlateParams),
90    JPXDecode, //Jpeg2k
91    DCTDecode (DCTDecodeParams),
92    CCITTFaxDecode (CCITTFaxDecodeParams),
93    JBIG2Decode(JBIG2DecodeParams),
94    Crypt,
95    RunLengthDecode
96}
97impl StreamFilter {
98    pub fn from_kind_and_params(kind: &str, params: Dictionary, r: &impl Resolve) -> Result<StreamFilter> {
99       let params = Primitive::Dictionary (params);
100       Ok(
101       match kind {
102           "ASCIIHexDecode" => StreamFilter::ASCIIHexDecode,
103           "ASCII85Decode" => StreamFilter::ASCII85Decode,
104           "LZWDecode" => StreamFilter::LZWDecode (LZWFlateParams::from_primitive(params, r)?),
105           "FlateDecode" => StreamFilter::FlateDecode (LZWFlateParams::from_primitive(params, r)?),
106           "JPXDecode" => StreamFilter::JPXDecode,
107           "DCTDecode" => StreamFilter::DCTDecode (DCTDecodeParams::from_primitive(params, r)?),
108           "CCITTFaxDecode" => StreamFilter::CCITTFaxDecode (CCITTFaxDecodeParams::from_primitive(params, r)?),
109           "JBIG2Decode" => StreamFilter::JBIG2Decode(JBIG2DecodeParams::from_primitive(params, r)?),
110           "Crypt" => StreamFilter::Crypt,
111           "RunLengthDecode" => StreamFilter::RunLengthDecode,
112           ty => bail!("Unrecognized filter type {:?}", ty),
113       } 
114       )
115    }
116}
117
118#[inline]
119pub fn decode_nibble(c: u8) -> Option<u8> {
120    match c {
121        n @ b'0' ..= b'9' => Some(n - b'0'),
122        a @ b'a' ..= b'h' => Some(a - b'a' + 0xa),
123        a @ b'A' ..= b'H' => Some(a - b'A' + 0xA),
124        _ => None
125    }
126}
127
128#[inline]
129fn encode_nibble(c: u8) -> u8 {
130    match c {
131        0 ..= 9 => b'0'+ c,
132        10 ..= 15 => b'a' - 10 + c,
133        _ => unreachable!()
134    }
135}
136
137
138pub fn decode_hex(data: &[u8]) -> Result<Vec<u8>> {
139    let mut out = Vec::with_capacity(data.len() / 2);
140    let pairs = data.iter().cloned()
141        .take_while(|&b| b != b'>')
142        .filter(|&b| !matches!(b, 0 | 9 | 10 | 12 | 13 | 32))
143        .tuples();
144    for (i, (high, low)) in pairs.enumerate() {
145        if let (Some(low), Some(high)) = (decode_nibble(low), decode_nibble(high)) {
146            out.push(high << 4 | low);
147        } else {
148            return Err(PdfError::HexDecode {pos: i * 2, bytes: [high, low]})
149        }
150    }
151    Ok(out)
152}
153pub fn encode_hex(data: &[u8]) -> Vec<u8> {
154    let mut buf = Vec::with_capacity(data.len() * 2);
155    for &b in data {
156        buf.push(encode_nibble(b >> 4));
157        buf.push(encode_nibble(b & 0xf));
158    }
159    buf
160}
161
162#[inline]
163fn sym_85(byte: u8) -> Option<u8> {
164    match byte {
165        b @ 0x21 ..= 0x75 => Some(b - 0x21),
166        _ => None
167    }
168}
169
170fn word_85([a, b, c, d, e]: [u8; 5]) -> Option<[u8; 4]> {
171    fn s(b: u8) -> Option<u64> { sym_85(b).map(|n| n as u64) }
172    let (a, b, c, d, e) = (s(a)?, s(b)?, s(c)?, s(d)?, s(e)?);
173    let q = (((a * 85 + b) * 85 + c) * 85 + d) * 85 + e;
174    // 85^5 > 256^4, the result might not fit in an u32.
175    let r = u32::try_from(q).ok()?;
176    Some(r.to_be_bytes())
177}
178
179pub fn decode_85(data: &[u8]) -> Result<Vec<u8>> {
180    let mut out = Vec::with_capacity((data.len() + 4) / 5 * 4);
181    
182    let mut stream = data.iter().cloned()
183        .filter(|&b| !matches!(b, b' ' | b'\n' | b'\r' | b'\t'));
184
185    let mut symbols = stream.by_ref()
186        .take_while(|&b| b != b'~');
187
188    let (tail_len, tail) = loop {
189        match symbols.next() {
190            Some(b'z') => out.extend_from_slice(&[0; 4]),
191            Some(a) => {
192                let (b, c, d, e) = match (symbols.next(), symbols.next(), symbols.next(), symbols.next()) {
193                    (Some(b), Some(c), Some(d), Some(e)) => (b, c, d, e),
194                    (None, _, _, _) => break (1, [a, b'u', b'u', b'u', b'u']),
195                    (Some(b), None, _, _) => break (2, [a, b, b'u', b'u', b'u']),
196                    (Some(b), Some(c), None, _) => break (3, [a, b, c, b'u', b'u']),
197                    (Some(b), Some(c), Some(d), None) => break (4, [a, b, c, d, b'u']),
198                };
199                out.extend_from_slice(&word_85([a, b, c, d, e]).ok_or(PdfError::Ascii85TailError)?);
200            }
201            None => break (0, [b'u'; 5])
202        }
203    };
204
205    if tail_len > 0 {
206        let last = word_85(tail).ok_or(PdfError::Ascii85TailError)?;
207        out.extend_from_slice(&last[.. tail_len-1]);
208    }
209
210    match (stream.next(), stream.next()) {
211        (Some(b'>'), None) => Ok(out),
212        _ => Err(PdfError::Ascii85TailError)
213    }
214}
215
216#[inline]
217fn divmod(n: u32, m: u32) -> (u32, u32) {
218    (n / m, n % m)
219}
220
221#[inline]
222fn a85(n: u32) -> u8 {
223    n as u8 + 0x21
224}
225
226#[inline]
227fn base85_chunk(c: [u8; 4]) -> [u8; 5] {
228    let n = u32::from_be_bytes(c);
229    let (n, e) = divmod(n, 85);
230    let (n, d) = divmod(n, 85);
231    let (n, c) = divmod(n, 85);
232    let (a, b) = divmod(n, 85);
233    
234    [a85(a), a85(b), a85(c), a85(d), a85(e)]
235}
236
237fn encode_85(data: &[u8]) -> Vec<u8> {
238    let mut buf = Vec::with_capacity((data.len() / 4) * 5 + 10);
239    let mut chunks = data.chunks_exact(4);
240    for chunk in chunks.by_ref() {
241        let c: [u8; 4] = chunk.try_into().unwrap();
242        if c == [0; 4] {
243            buf.push(b'z');
244        } else {
245            buf.extend_from_slice(&base85_chunk(c));
246        }
247    }
248
249    let r = chunks.remainder();
250    if r.len() > 0 {
251        let mut c = [0; 4];
252        c[.. r.len()].copy_from_slice(r);
253        let out = base85_chunk(c);
254        buf.extend_from_slice(&out[.. r.len() + 1]);
255    }
256    buf.extend_from_slice(b"~>");
257    buf
258}
259
260fn inflate_bytes_zlib(data: &[u8]) -> Result<Vec<u8>> {
261    use libflate::zlib::Decoder;
262    let mut decoder = Decoder::new(data)?;
263    let mut decoded = Vec::new();
264    decoder.read_to_end(&mut decoded)?;
265    Ok(decoded)
266}
267
268fn inflate_bytes(data: &[u8]) -> Result<Vec<u8>> {
269    use libflate::deflate::Decoder;
270    let mut decoder = Decoder::new(data);
271    let mut decoded = Vec::new();
272    decoder.read_to_end(&mut decoded)?;
273    Ok(decoded)
274}
275
276pub fn flate_decode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
277
278    let predictor = params.predictor as usize;
279    let n_components = params.n_components as usize;
280    let columns = params.columns as usize;
281    let stride = columns * n_components;
282
283
284    // First flate decode
285    let decoded = {
286        if let Ok(data) = inflate_bytes_zlib(data) {
287            data
288        } else if let Ok(data) = inflate_bytes(data) {
289            data
290        } else {
291            dump_data(data);
292            bail!("can't inflate");
293        }
294    };
295    // Then unfilter (PNG)
296    // For this, take the old out as input, and write output to out
297
298    if predictor > 10 {
299        let inp = decoded; // input buffer
300        let rows = inp.len() / (stride+1);
301        
302        // output buffer
303        let mut out = vec![0; rows * stride];
304    
305        // Apply inverse predictor
306        let null_vec = vec![0; stride];
307        
308        let mut in_off = 0; // offset into input buffer
309        
310        let mut out_off = 0; // offset into output buffer
311        let mut last_out_off = 0; // last offset to output buffer
312        
313        while in_off + stride < inp.len() {
314            let predictor = PredictorType::from_u8(inp[in_off])?;
315            in_off += 1; // +1 because the first byte on each row is predictor
316            
317            let row_in = &inp[in_off .. in_off + stride];
318            let (prev_row, row_out) = if out_off == 0 {
319                (&null_vec[..], &mut out[out_off .. out_off+stride])
320            } else {
321                let (prev, curr) = out.split_at_mut(out_off);
322                (&prev[last_out_off ..], &mut curr[.. stride])
323            };
324            unfilter(predictor, n_components, prev_row, row_in, row_out);
325            
326            last_out_off = out_off;
327            
328            in_off += stride;
329            out_off += stride;
330        }
331        Ok(out)
332    } else {
333        Ok(decoded)
334    }
335}
336fn flate_encode(data: &[u8]) -> Vec<u8> {
337    use libflate::deflate::Encoder;
338    let mut encoded = Vec::new();
339    let mut encoder = Encoder::new(&mut encoded);
340    encoder.write_all(data).unwrap();
341    encoded
342}
343
344pub fn dct_decode(data: &[u8], _params: &DCTDecodeParams) -> Result<Vec<u8>> {
345    use jpeg_decoder::Decoder;
346    let mut decoder = Decoder::new(data);
347    let pixels = decoder.decode()?;
348    Ok(pixels)
349}
350
351pub fn lzw_decode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
352    use weezl::{BitOrder, decode::Decoder};
353    let mut out = vec![];
354
355    let mut decoder = if params.early_change != 0 {
356        Decoder::with_tiff_size_switch(BitOrder::Msb, 9)
357    } else {
358        Decoder::new(BitOrder::Msb, 9)
359    };
360
361    decoder
362        .into_stream(&mut out)
363        .decode_all(data).status?;
364    Ok(out)
365}
366fn lzw_encode(data: &[u8], params: &LZWFlateParams) -> Result<Vec<u8>> {
367    use weezl::{BitOrder, encode::Encoder};
368    if params.early_change != 0 {
369        bail!("encoding early_change != 0 is not supported");
370    }
371    let mut compressed = vec![];
372    Encoder::new(BitOrder::Msb, 9)
373        .into_stream(&mut compressed)
374        .encode_all(data).status?;
375    Ok(compressed)
376}
377
378pub fn fax_decode(data: &[u8], params: &CCITTFaxDecodeParams) -> Result<Vec<u8>> {
379    use fax::{Color, decoder::{pels, decode_g4}};
380
381    if params.k < 0 {
382        let columns = params.columns as usize;
383        let rows = params.rows as usize;
384
385        let height = if params.rows == 0 { None } else { Some(params.rows as u16)};
386        let mut buf = Vec::with_capacity(columns * rows);
387        decode_g4(data.iter().cloned(), columns as u16, height, |line| {
388            buf.extend(pels(line, columns as u16).map(|c| match c {
389                Color::Black => 0,
390                Color::White => 255
391            }));
392            assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns);
393        }).ok_or(PdfError::Other { msg: "faxdecode failed".into() })?;
394        assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns);
395
396        if rows != 0 && buf.len() != columns * rows {
397            bail!("decoded length does not match (expected {rows}∙{columns}, got {})", buf.len());
398        }
399        Ok(buf)
400    } else {
401        unimplemented!()
402    }
403}
404
405pub fn run_length_decode(data: &[u8]) -> Result<Vec<u8>> {
406    // Used <http://benno.id.au/refs/PDFReference15_v5.pdf> as specification
407    let mut buf = Vec::new();
408    let d = data;
409    let mut c = 0;
410
411    while c < data.len() {
412        let length = d[c]; // length is first byte
413        if length < 128 {
414            let start = c + 1;
415            let end = start + length as usize + 1;
416            // copy _following_ length + 1 bytes literally
417            buf.extend_from_slice(&d[start..end]);
418            c = end; // move cursor to next run
419        } else if length >= 129 {
420            let copy = 257 - length as usize; // copy 2 - 128 times
421            let b = d[c + 1]; // copied byte
422            buf.extend(std::iter::repeat(b).take(copy));
423            c += 2; // move cursor to next run
424        } else {
425            break; // EOD
426        }
427    }
428
429    Ok(buf)
430}
431
432pub type DecodeFn = dyn Fn(&[u8]) -> Result<Vec<u8>> + Sync + Send + 'static;
433static JPX_DECODER: OnceCell<Box<DecodeFn>> = OnceCell::new();
434static JBIG2_DECODER: OnceCell<Box<DecodeFn>> = OnceCell::new();
435
436pub fn set_jpx_decoder(f: Box<DecodeFn>) {
437    let _ = JPX_DECODER.set(f);
438}
439pub fn set_jbig2_decoder(f: Box<DecodeFn>) {
440    let _ = JBIG2_DECODER.set(f);
441}
442
443pub fn jpx_decode(data: &[u8]) -> Result<Vec<u8>> {
444    JPX_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jp2k decoder not set".into()})?(data)
445}
446pub fn jbig2_decode(data: &[u8], globals: &[u8]) -> Result<Vec<u8>> {
447    let data = [
448        // file header
449        // &[0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x01],
450
451        globals,
452        data,
453
454        // end of page
455        &[0x00, 0x00, 0x00, 0x03, 0x31, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00],
456
457        // end of stream
458        &[0x00, 0x00, 0x00, 0x04, 0x33, 0x01, 0x00, 0x00, 0x00, 0x00],
459    ].concat();
460    JBIG2_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jbig2 decoder not set".into()})?(&data)
461}
462
463pub fn decode(data: &[u8], filter: &StreamFilter) -> Result<Vec<u8>> {
464    match *filter {
465        StreamFilter::ASCIIHexDecode => decode_hex(data),
466        StreamFilter::ASCII85Decode => decode_85(data),
467        StreamFilter::LZWDecode(ref params) => lzw_decode(data, params),
468        StreamFilter::FlateDecode(ref params) => flate_decode(data, params),
469        StreamFilter::RunLengthDecode => run_length_decode(data),
470        StreamFilter::DCTDecode(ref params) => dct_decode(data, params),
471
472        _ => bail!("unimplemented {filter:?}"),
473    }
474}
475
476pub fn encode(data: &[u8], filter: &StreamFilter) -> Result<Vec<u8>> {
477    match *filter {
478        StreamFilter::ASCIIHexDecode => Ok(encode_hex(data)),
479        StreamFilter::ASCII85Decode => Ok(encode_85(data)),
480        StreamFilter::LZWDecode(ref params) => lzw_encode(data, params),
481        StreamFilter::FlateDecode (ref _params) => Ok(flate_encode(data)),
482        _ => unimplemented!(),
483    }
484}
485
486/*
487 * Predictor - copied and adapted from PNG crate..
488 */
489
490#[derive(Debug, Clone, Copy, PartialEq, Eq)]
491#[repr(u8)]
492#[allow(dead_code)]
493pub enum PredictorType {
494    NoFilter = 0,
495    Sub = 1,
496    Up = 2,
497    Avg = 3,
498    Paeth = 4
499}
500
501impl PredictorType {  
502    /// u8 -> Self. Temporary solution until Rust provides a canonical one.
503    pub fn from_u8(n: u8) -> Result<PredictorType> {
504        match n {
505            0 => Ok(PredictorType::NoFilter),
506            1 => Ok(PredictorType::Sub),
507            2 => Ok(PredictorType::Up),
508            3 => Ok(PredictorType::Avg),
509            4 => Ok(PredictorType::Paeth),
510            n => Err(PdfError::IncorrectPredictorType {n})
511        }
512    }
513}
514
515fn filter_paeth(a: u8, b: u8, c: u8) -> u8 {
516    let ia = a as i16;
517    let ib = b as i16;
518    let ic = c as i16;
519
520    let p = ia + ib - ic;
521
522    let pa = (p - ia).abs();
523    let pb = (p - ib).abs();
524    let pc = (p - ic).abs();
525
526    if pa <= pb && pa <= pc {
527        a
528    } else if pb <= pc {
529        b
530    } else {
531        c
532    }
533}
534
535pub fn unfilter(filter: PredictorType, bpp: usize, prev: &[u8], inp: &[u8], out: &mut [u8]) {
536    use self::PredictorType::*;
537    let len = inp.len();
538    assert_eq!(len, out.len());
539    assert_eq!(len, prev.len());
540    if bpp > len {
541        return;
542    }
543
544    match filter {
545        NoFilter => {
546            out[..len].copy_from_slice(&inp[..len]);
547        }
548        Sub => {
549            out[..bpp].copy_from_slice(&inp[..bpp]);
550
551            for i in bpp..len {
552                out[i] = inp[i].wrapping_add(out[i - bpp]);
553            }
554        }
555        Up => {
556            for i in 0..len {
557                out[i] = inp[i].wrapping_add(prev[i]);
558            }
559        }
560        Avg => {
561            for i in 0..bpp {
562                out[i] = inp[i].wrapping_add(prev[i] / 2);
563            }
564
565            for i in bpp..len {
566                out[i] = inp[i].wrapping_add(
567                    ((out[i - bpp] as i16 + prev[i] as i16) / 2) as u8
568                );
569            }
570        }
571        Paeth => {
572            for i in 0..bpp {
573                out[i] = inp[i].wrapping_add(
574                    filter_paeth(0, prev[i], 0)
575                );
576            }
577
578            for i in bpp..len {
579                out[i] = inp[i].wrapping_add(
580                    filter_paeth(out[i - bpp], prev[i], prev[i - bpp])
581                );
582            }
583        }
584    }
585}
586
587#[allow(unused)]
588pub fn filter(method: PredictorType, bpp: usize, previous: &[u8], current: &mut [u8]) {
589    use self::PredictorType::*;
590    let len  = current.len();
591
592    match method {
593        NoFilter => (),
594        Sub => {
595            for i in (bpp..len).rev() {
596                current[i] = current[i].wrapping_sub(current[i - bpp]);
597            }
598        }
599        Up => {
600            for i in 0..len {
601                current[i] = current[i].wrapping_sub(previous[i]);
602            }
603        }
604        Avg => {
605            for i in (bpp..len).rev() {
606                current[i] = current[i].wrapping_sub(current[i - bpp].wrapping_add(previous[i]) / 2);
607            }
608
609            for i in 0..bpp {
610                current[i] = current[i].wrapping_sub(previous[i] / 2);
611            }
612        }
613        Paeth => {
614            for i in (bpp..len).rev() {
615                current[i] = current[i].wrapping_sub(filter_paeth(current[i - bpp], previous[i], previous[i - bpp]));
616            }
617
618            for i in 0..bpp {
619                current[i] = current[i].wrapping_sub(filter_paeth(0, previous[i], 0));
620            }
621        }
622    }
623}
624
625#[cfg(test)]
626mod tests {
627    use super::*;
628
629    #[test]
630    fn base_85() {
631        fn s(b: &[u8]) -> &str { std::str::from_utf8(b).unwrap() }
632
633        let case = &b"hello world!"[..];
634        let encoded = encode_85(case);
635        assert_eq!(s(&encoded), "BOu!rD]j7BEbo80~>");
636        let decoded = decode_85(&encoded).unwrap();
637        assert_eq!(case, &*decoded);
638        /*
639        assert_eq!(
640            s(&decode_85(
641                &lzw_decode(
642                    &decode_85(&include_bytes!("data/t01_lzw+base85.txt")[..]).unwrap(),
643                    &LZWFlateParams::default()
644                ).unwrap()
645            ).unwrap()),
646            include_str!("data/t01_plain.txt")
647        );
648        */
649    }
650
651    #[test]
652    fn run_length_decode_test() {
653        let x = run_length_decode(&[254, b'a', 255, b'b', 2, b'c', b'b', b'c', 254, b'a', 128]).unwrap();
654        assert_eq!(b"aaabbcbcaaa", x.as_slice());
655    }
656}