rpdfium_codec/basic/
mod.rs

1// Derived from PDFium's basic/basicmodule.h/cpp
2// Original: Copyright 2014 The PDFium Authors
3// Licensed under BSD-3-Clause / Apache-2.0
4// See pdfium-upstream/LICENSE for the original license.
5
6//! Basic stream filters — ASCII85, ASCIIHex, RunLength.
7//!
8//! All three filters are consolidated here, mirroring PDFium's single
9//! `core/fxcodec/basic/basicmodule.h/cpp` source file.
10//!
11//! | PDFium `BasicModule` | rpdfium |
12//! |---|---|
13//! | `A85Encode()` | [`ascii85::encode`] |
14//! | *(decode via codec pipeline)* | [`ascii85::decode`] |
15//! | *(decode via codec pipeline)* | [`ascii_hex::decode`] |
16//! | `RunLengthEncode()` | [`run_length::encode`] |
17//! | *(decode via codec pipeline)* | [`run_length::decode`] |
18//! | `CreateRunLengthDecoder()` | [`run_length::create_decoder`] |
19
20// ---------------------------------------------------------------------------
21// ascii85
22// ---------------------------------------------------------------------------
23
24pub mod ascii85 {
25    //! ASCII85Decode / ASCII85Encode filter.
26
27    use crate::error::DecodeError;
28
29    /// Encode binary data as ASCII85 (base-85).
30    ///
31    /// - Groups of 4 bytes encode to 5 ASCII characters in `!`–`u` range.
32    /// - A group of 4 zero bytes encodes to the single character `z`.
33    /// - A line break (`\r\n`) is inserted every 75 output characters.
34    /// - Terminated with `~>`.
35    ///
36    /// Mirrors PDFium's `BasicModule::A85Encode()`.
37    pub fn encode(input: &[u8]) -> Vec<u8> {
38        if input.is_empty() {
39            return Vec::new();
40        }
41
42        // Worst case: 5/4 expansion + line breaks + "~>" terminator
43        let estimated = (input.len() / 4 + 1) * 5 + input.len() / 30 + 4;
44        let mut output = Vec::with_capacity(estimated);
45        let mut line_length = 0usize;
46        let mut pos = 0usize;
47
48        // Process full 4-byte groups
49        while pos + 4 <= input.len() {
50            let val =
51                u32::from_be_bytes([input[pos], input[pos + 1], input[pos + 2], input[pos + 3]]);
52            pos += 4;
53
54            if val == 0 {
55                // All-zero special case: 'z'
56                output.push(b'z');
57                line_length += 1;
58            } else {
59                let mut v = val as u64;
60                let mut chars = [0u8; 5];
61                for c in chars.iter_mut().rev() {
62                    *c = (v % 85) as u8 + 33;
63                    v /= 85;
64                }
65                output.extend_from_slice(&chars);
66                line_length += 5;
67            }
68
69            if line_length >= 75 {
70                output.extend_from_slice(b"\r\n");
71                line_length = 0;
72            }
73        }
74
75        // Leftover bytes (1–3): pad to 4 bytes, output count+1 digits
76        if pos < input.len() {
77            let count = input.len() - pos;
78            let mut val = 0u32;
79            for k in 0..count {
80                val |= u32::from(input[pos + k]) << (8 * (3 - k));
81            }
82            let mut v = val as u64;
83            let mut chars = [0u8; 5];
84            // Compute all 5 digits (most-to-least significant at indices 0–4)
85            // but only write the first count+1 of them.
86            for i in (0..5usize).rev() {
87                if i <= count {
88                    chars[i] = (v % 85) as u8 + 33;
89                }
90                v /= 85;
91            }
92            output.extend_from_slice(&chars[..count + 1]);
93        }
94
95        output.extend_from_slice(b"~>");
96        output
97    }
98
99    /// Decode ASCII85 (base-85) encoded data.
100    ///
101    /// - Groups of 5 ASCII characters in the range `!` (33) to `u` (117)
102    ///   decode to 4 binary bytes.
103    /// - The character `z` is a shortcut for 4 zero bytes.
104    /// - The end-of-data marker is `~>`.
105    /// - Whitespace is ignored.
106    pub fn decode(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
107        let mut output = Vec::with_capacity(input.len() * 4 / 5);
108        let mut group = [0u8; 5];
109        let mut count = 0usize;
110
111        let mut i = 0;
112        while i < input.len() {
113            let b = input[i];
114            i += 1;
115
116            // Whitespace — skip
117            if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' || b == b'\x0C' {
118                continue;
119            }
120
121            // End-of-data marker
122            if b == b'~' {
123                if i < input.len() && input[i] == b'>' {
124                    break; // consume '>'
125                }
126                break; // '~' without '>' — lenient
127            }
128
129            // 'z' shortcut for four zero bytes
130            if b == b'z' {
131                if count != 0 {
132                    return Err(DecodeError::InvalidInput(
133                        "ASCII85: 'z' inside a group".into(),
134                    ));
135                }
136                output.extend_from_slice(&[0, 0, 0, 0]);
137                continue;
138            }
139
140            // Valid ASCII85 character range: '!' (33) to 'u' (117)
141            if !(b'!'..=b'u').contains(&b) {
142                return Err(DecodeError::InvalidInput(format!(
143                    "ASCII85: invalid character 0x{b:02X}"
144                )));
145            }
146
147            group[count] = b - b'!';
148            count += 1;
149
150            if count == 5 {
151                let value = u64::from(group[0]) * 85 * 85 * 85 * 85
152                    + u64::from(group[1]) * 85 * 85 * 85
153                    + u64::from(group[2]) * 85 * 85
154                    + u64::from(group[3]) * 85
155                    + u64::from(group[4]);
156
157                if value > u64::from(u32::MAX) {
158                    return Err(DecodeError::InvalidInput(
159                        "ASCII85: group value exceeds 2^32-1".into(),
160                    ));
161                }
162
163                let value = value as u32;
164                output.push((value >> 24) as u8);
165                output.push((value >> 16) as u8);
166                output.push((value >> 8) as u8);
167                output.push(value as u8);
168                count = 0;
169            }
170        }
171
172        // Handle remaining partial group (1-4 chars)
173        if count > 0 {
174            if count == 1 {
175                return Err(DecodeError::InvalidInput(
176                    "ASCII85: single trailing character is invalid".into(),
177                ));
178            }
179
180            // Pad with 'u' (84) to make a full group
181            for slot in group.iter_mut().skip(count) {
182                *slot = 84; // 'u' - '!' = 84
183            }
184
185            let value = u64::from(group[0]) * 85 * 85 * 85 * 85
186                + u64::from(group[1]) * 85 * 85 * 85
187                + u64::from(group[2]) * 85 * 85
188                + u64::from(group[3]) * 85
189                + u64::from(group[4]);
190
191            if value > u64::from(u32::MAX) {
192                return Err(DecodeError::InvalidInput(
193                    "ASCII85: padded group value exceeds 2^32-1".into(),
194                ));
195            }
196
197            let value = value as u32;
198            let bytes = value.to_be_bytes();
199            output.extend_from_slice(&bytes[..count - 1]);
200        }
201
202        Ok(output)
203    }
204
205    #[cfg(test)]
206    mod tests {
207        use super::*;
208
209        // --- encode ---
210
211        #[test]
212        fn encode_empty() {
213            assert!(encode(&[]).is_empty());
214        }
215
216        #[test]
217        fn encode_basic() {
218            assert_eq!(encode(b"Man "), b"9jqo^~>");
219        }
220
221        #[test]
222        fn encode_all_zeros() {
223            assert_eq!(encode(&[0, 0, 0, 0]), b"z~>");
224        }
225
226        #[test]
227        fn encode_partial_group() {
228            let result = encode(b"a");
229            assert!(result.ends_with(b"~>"));
230            assert_eq!(result.len(), 4); // 2 digits + "~>"
231        }
232
233        #[test]
234        fn encode_decode_roundtrip() {
235            let original = b"Hello, World! This is a test of ASCII85.";
236            let decoded = decode(&encode(original)).unwrap();
237            assert_eq!(decoded, original);
238        }
239
240        #[test]
241        fn encode_decode_roundtrip_all_bytes() {
242            let original: Vec<u8> = (0u8..=255).collect();
243            assert_eq!(decode(&encode(&original)).unwrap(), original);
244        }
245
246        #[test]
247        fn encode_line_break_at_75() {
248            let input = vec![0xABu8; 80];
249            let encoded = encode(&input);
250            assert!(encoded.windows(2).any(|w| w == b"\r\n"));
251        }
252
253        // --- decode ---
254
255        #[test]
256        fn decode_basic() {
257            assert_eq!(decode(b"9jqo^~>").unwrap(), b"Man ");
258        }
259
260        #[test]
261        fn decode_z_shortcut() {
262            assert_eq!(decode(b"z~>").unwrap(), vec![0, 0, 0, 0]);
263        }
264
265        #[test]
266        fn decode_z_shortcut_multiple() {
267            assert_eq!(decode(b"zz~>").unwrap(), vec![0u8; 8]);
268        }
269
270        #[test]
271        fn decode_whitespace_ignored() {
272            assert_eq!(decode(b"9 jqo ^\n~>").unwrap(), b"Man ");
273        }
274
275        #[test]
276        fn decode_empty() {
277            assert!(decode(b"~>").unwrap().is_empty());
278        }
279
280        #[test]
281        fn decode_partial_group() {
282            let result = decode(b"9jqo^BlbD~>").unwrap();
283            assert_eq!(&result[..4], b"Man ");
284            assert_eq!(result.len(), 7);
285        }
286
287        #[test]
288        fn decode_single_trailing_char_invalid() {
289            assert!(decode(b"A~>").is_err());
290        }
291
292        #[test]
293        fn decode_invalid_char() {
294            assert!(decode(b"9jqo^{~>").is_err());
295        }
296
297        #[test]
298        fn decode_z_inside_group_error() {
299            assert!(decode(b"Az~>").is_err());
300        }
301
302        #[test]
303        fn decode_no_eod_marker() {
304            assert_eq!(decode(b"9jqo^").unwrap(), b"Man ");
305        }
306
307        #[test]
308        fn decode_known_string() {
309            assert_eq!(decode(b"87cURDe~>").unwrap(), b"Hello");
310        }
311    }
312}
313
314// ---------------------------------------------------------------------------
315// ascii_hex
316// ---------------------------------------------------------------------------
317
318pub mod ascii_hex {
319    //! ASCIIHexDecode filter.
320    //!
321    //! Note: PDFium's `BasicModule` does not include an ASCIIHex encoder;
322    //! this decode-only module is an rpdfium extension.
323
324    use crate::error::DecodeError;
325
326    /// Decode ASCIIHex-encoded data.
327    ///
328    /// - Pairs of hexadecimal digits decode to bytes.
329    /// - `>` marks end of data.
330    /// - Whitespace is ignored.
331    /// - An odd trailing nibble is zero-padded on the right (e.g., `A` → `0xA0`).
332    pub fn decode(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
333        let mut output = Vec::with_capacity(input.len() / 2);
334        let mut high_nibble: Option<u8> = None;
335
336        for &b in input {
337            if b == b'>' {
338                break;
339            }
340            if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' || b == b'\x0C' {
341                continue;
342            }
343
344            let nibble = hex_nibble(b)?;
345            match high_nibble.take() {
346                None => high_nibble = Some(nibble),
347                Some(high) => output.push((high << 4) | nibble),
348            }
349        }
350
351        // Odd trailing nibble: pad with 0 on the right
352        if let Some(high) = high_nibble {
353            output.push(high << 4);
354        }
355
356        Ok(output)
357    }
358
359    fn hex_nibble(b: u8) -> Result<u8, crate::error::DecodeError> {
360        match b {
361            b'0'..=b'9' => Ok(b - b'0'),
362            b'a'..=b'f' => Ok(b - b'a' + 10),
363            b'A'..=b'F' => Ok(b - b'A' + 10),
364            _ => Err(crate::error::DecodeError::InvalidInput(format!(
365                "ASCIIHex: invalid hex character 0x{b:02X}"
366            ))),
367        }
368    }
369
370    #[cfg(test)]
371    mod tests {
372        use super::*;
373
374        #[test]
375        fn decode_basic() {
376            assert_eq!(decode(b"48656C6C6F>").unwrap(), b"Hello");
377        }
378
379        #[test]
380        fn decode_lowercase() {
381            assert_eq!(decode(b"48656c6c6f>").unwrap(), b"Hello");
382        }
383
384        #[test]
385        fn decode_whitespace_ignored() {
386            assert_eq!(decode(b"48 65 6C\n6C 6F>").unwrap(), b"Hello");
387        }
388
389        #[test]
390        fn decode_odd_nibble() {
391            assert_eq!(decode(b"A>").unwrap(), vec![0xA0]);
392        }
393
394        #[test]
395        fn decode_empty() {
396            assert!(decode(b">").unwrap().is_empty());
397        }
398
399        #[test]
400        fn decode_no_eod_marker() {
401            assert_eq!(decode(b"4865").unwrap(), b"He");
402        }
403
404        #[test]
405        fn decode_invalid_char() {
406            assert!(decode(b"4G>").is_err());
407        }
408
409        #[test]
410        fn decode_all_zeros() {
411            assert_eq!(decode(b"0000>").unwrap(), vec![0, 0]);
412        }
413
414        #[test]
415        fn decode_all_ff() {
416            assert_eq!(decode(b"FFFF>").unwrap(), vec![0xFF, 0xFF]);
417        }
418    }
419}
420
421// ---------------------------------------------------------------------------
422// run_length
423// ---------------------------------------------------------------------------
424
425pub mod run_length {
426    //! RunLengthDecode / RunLengthEncode filter and scanline decoder.
427    //!
428    //! Mirrors PDFium's `BasicModule::RunLengthEncode()`,
429    //! `BasicModule::CreateRunLengthDecoder()`, and the internal
430    //! `RLScanlineDecoder`.
431
432    use crate::error::DecodeError;
433    use crate::scanline::ScanlineDecoder;
434
435    // -----------------------------------------------------------------------
436    // Batch decode
437    // -----------------------------------------------------------------------
438
439    /// Decode run-length encoded data.
440    ///
441    /// - Length byte 0–127: copy the next `length + 1` bytes literally.
442    /// - Length byte 129–255: repeat the next byte `257 - length` times.
443    /// - Length byte 128: end of data (EOD).
444    pub fn decode(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
445        let mut output = Vec::new();
446        let mut i = 0;
447
448        while i < input.len() {
449            let length = input[i];
450            i += 1;
451
452            if length == 128 {
453                break; // EOD
454            }
455
456            if length <= 127 {
457                let count = usize::from(length) + 1;
458                if i + count > input.len() {
459                    return Err(DecodeError::InvalidInput(
460                        "RunLength: literal run extends past end of input".into(),
461                    ));
462                }
463                output.extend_from_slice(&input[i..i + count]);
464                i += count;
465            } else {
466                if i >= input.len() {
467                    return Err(DecodeError::InvalidInput(
468                        "RunLength: repeat run missing data byte".into(),
469                    ));
470                }
471                let count = 257 - usize::from(length);
472                let byte = input[i];
473                i += 1;
474                output.resize(output.len() + count, byte);
475            }
476        }
477
478        Ok(output)
479    }
480
481    // -----------------------------------------------------------------------
482    // Encode
483    // -----------------------------------------------------------------------
484
485    /// Encode data using run-length encoding.
486    ///
487    /// - Matched runs of 2–128 identical bytes → `[257 - run_len, byte]`.
488    /// - Mismatched runs of 1–128 distinct bytes → `[run_len - 1, b0, b1, ...]`.
489    /// - Terminated with the EOD marker `128`.
490    ///
491    /// Mirrors PDFium's `BasicModule::RunLengthEncode()`.
492    pub fn encode(input: &[u8]) -> Vec<u8> {
493        if input.is_empty() {
494            return vec![];
495        }
496        if input.len() == 1 {
497            return vec![0, input[0], 128];
498        }
499
500        // Worst case: 4 output bytes per 3 input (plus terminator).
501        let estimated = input.len().div_ceil(3) * 4 + 1;
502        let mut result = vec![0u8; estimated];
503        let mut wpos = 0usize;
504
505        let mut run_start = 0usize;
506        let mut run_end = 1usize;
507        let mut x = input[run_start];
508        let mut y = input[run_end];
509
510        while run_end < input.len() {
511            let max_len = 128usize.min(input.len() - run_start);
512
513            while x == y && run_end - run_start < max_len - 1 {
514                run_end += 1;
515                y = input[run_end];
516            }
517
518            if x == y {
519                run_end += 1;
520                if run_end < input.len() {
521                    y = input[run_end];
522                }
523            }
524
525            if run_end - run_start > 1 {
526                result[wpos] = (257 - (run_end - run_start)) as u8;
527                result[wpos + 1] = x;
528                wpos += 2;
529                x = y;
530                run_start = run_end;
531                run_end += 1;
532                if run_end < input.len() {
533                    y = input[run_end];
534                }
535                continue;
536            }
537
538            while x != y && run_end <= run_start + max_len {
539                result[wpos + (run_end - run_start)] = x;
540                x = y;
541                run_end += 1;
542                if run_end == input.len() {
543                    if run_end <= run_start + max_len {
544                        result[wpos + (run_end - run_start)] = x;
545                        run_end += 1;
546                    }
547                    break;
548                }
549                y = input[run_end];
550            }
551            result[wpos] = (run_end - run_start - 2) as u8;
552            wpos += run_end - run_start;
553            run_start = run_end - 1;
554        }
555
556        if run_start < input.len() {
557            result[wpos] = 0;
558            result[wpos + 1] = x;
559            wpos += 2;
560        }
561
562        result[wpos] = 128; // EOD
563        result.truncate(wpos + 1);
564        result
565    }
566
567    // -----------------------------------------------------------------------
568    // ScanlineDecoder (RLScanlineDecoder port)
569    // -----------------------------------------------------------------------
570
571    /// State of the current run-length operator between scanlines.
572    #[derive(Clone, Copy)]
573    enum RunOp {
574        Literal { remaining: usize },
575        Repeat { remaining: usize, byte: u8 },
576        Eod,
577        Init,
578    }
579
580    /// Scanline-based RunLength decoder.
581    ///
582    /// Yields one decoded scanline at a time, maintaining run-length state
583    /// across scanline boundaries. Mirrors PDFium's `RLScanlineDecoder`.
584    ///
585    /// Construct via [`create_decoder`].
586    pub struct RunLengthScanlineDecoder {
587        src: Vec<u8>,
588        width: u32,
589        height: u32,
590        comps: u8,
591        bpc: u8,
592        row_stride: usize,
593        scanline: Vec<u8>,
594        src_offset: usize,
595        op: RunOp,
596        lines_decoded: usize,
597    }
598
599    /// Create a scanline decoder for RunLength-encoded image data.
600    ///
601    /// Validates that `src` contains enough bytes to decode `width × height`
602    /// pixels, then returns a decoder ready to yield one scanline per call.
603    ///
604    /// Mirrors PDFium's `BasicModule::CreateRunLengthDecoder()`.
605    pub fn create_decoder(
606        src: &[u8],
607        width: u32,
608        height: u32,
609        comps: u8,
610        bpc: u8,
611    ) -> Result<RunLengthScanlineDecoder, DecodeError> {
612        let row_stride = (width as usize * comps as usize * bpc as usize).div_ceil(8);
613        let decoder = RunLengthScanlineDecoder {
614            src: src.to_vec(),
615            width,
616            height,
617            comps,
618            bpc,
619            row_stride,
620            scanline: vec![0u8; row_stride],
621            src_offset: 0,
622            op: RunOp::Init,
623            lines_decoded: 0,
624        };
625        if !decoder.check_dest_size() {
626            return Err(DecodeError::InvalidInput(
627                "RunLength: compressed data too small for image dimensions".into(),
628            ));
629        }
630        Ok(decoder)
631    }
632
633    impl RunLengthScanlineDecoder {
634        /// Pre-validate that `src` decodes to at least the required number of
635        /// bytes. Mirrors `RLScanlineDecoder::CheckDestSize()`.
636        fn check_dest_size(&self) -> bool {
637            let mut i = 0usize;
638            let mut dest: u64 = 0;
639            while i < self.src.len() {
640                let b = self.src[i];
641                if b < 128 {
642                    dest += b as u64 + 1;
643                    i += b as usize + 2;
644                } else if b > 128 {
645                    dest += 257 - b as u64;
646                    i += 2;
647                } else {
648                    break; // EOD
649                }
650            }
651            let required =
652                self.width as u64 * self.comps as u64 * self.bpc as u64 * self.height as u64;
653            dest * 8 >= required
654        }
655
656        fn read_next_op(&mut self) {
657            if self.src_offset >= self.src.len() {
658                self.op = RunOp::Eod;
659                return;
660            }
661            let b = self.src[self.src_offset];
662            self.src_offset += 1;
663            self.op = if b < 128 {
664                RunOp::Literal {
665                    remaining: b as usize + 1,
666                }
667            } else if b > 128 {
668                let data_byte = if self.src_offset < self.src.len() {
669                    self.src[self.src_offset]
670                } else {
671                    0
672                };
673                RunOp::Repeat {
674                    remaining: 257 - b as usize,
675                    byte: data_byte,
676                }
677            } else {
678                RunOp::Eod
679            };
680        }
681
682        /// Mirrors `RLScanlineDecoder::GetNextLine()`.
683        fn fill_scanline(&mut self) -> bool {
684            if matches!(self.op, RunOp::Init) {
685                self.read_next_op();
686            }
687            if matches!(self.op, RunOp::Eod) {
688                return false;
689            }
690
691            self.scanline.fill(0);
692            let mut col = 0usize;
693
694            loop {
695                let space = self.row_stride - col;
696                if space == 0 {
697                    break;
698                }
699
700                match self.op {
701                    RunOp::Literal { remaining } => {
702                        let src_left = self.src.len().saturating_sub(self.src_offset);
703                        let copy = remaining.min(space).min(src_left);
704                        let src_end = self.src_offset + copy;
705                        self.scanline[col..col + copy]
706                            .copy_from_slice(&self.src[self.src_offset..src_end]);
707                        col += copy;
708                        self.src_offset += copy;
709                        let new_remaining = remaining - copy;
710                        if new_remaining == 0 {
711                            self.read_next_op();
712                        } else {
713                            self.op = RunOp::Literal {
714                                remaining: new_remaining,
715                            };
716                            break;
717                        }
718                    }
719                    RunOp::Repeat { remaining, byte } => {
720                        let copy = remaining.min(space);
721                        self.scanline[col..col + copy].fill(byte);
722                        col += copy;
723                        let new_remaining = remaining - copy;
724                        if new_remaining == 0 {
725                            self.src_offset += 1;
726                            self.read_next_op();
727                        } else {
728                            self.op = RunOp::Repeat {
729                                remaining: new_remaining,
730                                byte,
731                            };
732                            break;
733                        }
734                    }
735                    RunOp::Eod | RunOp::Init => break,
736                }
737
738                if matches!(self.op, RunOp::Eod) {
739                    break;
740                }
741            }
742
743            true
744        }
745    }
746
747    impl ScanlineDecoder for RunLengthScanlineDecoder {
748        fn width(&self) -> u32 {
749            self.width
750        }
751        fn height(&self) -> u32 {
752            self.height
753        }
754        fn count_comps(&self) -> u8 {
755            self.comps
756        }
757        fn bpc(&self) -> u8 {
758            self.bpc
759        }
760        fn row_stride(&self) -> usize {
761            self.row_stride
762        }
763
764        fn decode_scanline(&mut self) -> Result<Option<&[u8]>, DecodeError> {
765            if self.lines_decoded >= self.height as usize {
766                return Ok(None);
767            }
768            if !self.fill_scanline() {
769                return Ok(None);
770            }
771            self.lines_decoded += 1;
772            Ok(Some(&self.scanline))
773        }
774
775        fn reset(&mut self) -> Result<(), DecodeError> {
776            self.scanline.fill(0);
777            self.src_offset = 0;
778            self.op = RunOp::Init;
779            self.lines_decoded = 0;
780            Ok(())
781        }
782
783        fn current_line(&self) -> Option<usize> {
784            if self.lines_decoded == 0 {
785                None
786            } else {
787                Some(self.lines_decoded - 1)
788            }
789        }
790    }
791
792    #[cfg(test)]
793    mod tests {
794        use super::*;
795
796        // --- decode ---
797
798        #[test]
799        fn decode_literal_run() {
800            assert_eq!(decode(&[2, b'A', b'B', b'C', 128]).unwrap(), b"ABC");
801        }
802
803        #[test]
804        fn decode_repeat_run() {
805            assert_eq!(decode(&[253, b'X', 128]).unwrap(), b"XXXX");
806        }
807
808        #[test]
809        fn decode_mixed_runs() {
810            assert_eq!(decode(&[1, b'A', b'B', 254, b'Z', 128]).unwrap(), b"ABZZZ");
811        }
812
813        #[test]
814        fn decode_eod_marker() {
815            assert!(decode(&[128]).unwrap().is_empty());
816        }
817
818        #[test]
819        fn decode_empty_input() {
820            assert!(decode(&[]).unwrap().is_empty());
821        }
822
823        #[test]
824        fn decode_truncated_literal() {
825            assert!(decode(&[1, b'A']).is_err());
826        }
827
828        #[test]
829        fn decode_truncated_repeat() {
830            assert!(decode(&[255]).is_err());
831        }
832
833        #[test]
834        fn decode_single_literal() {
835            assert_eq!(decode(&[0, b'Q', 128]).unwrap(), b"Q");
836        }
837
838        #[test]
839        fn decode_max_repeat() {
840            let result = decode(&[129, b'Y', 128]).unwrap();
841            assert_eq!(result.len(), 128);
842            assert!(result.iter().all(|&b| b == b'Y'));
843        }
844
845        // --- encode ---
846
847        #[test]
848        fn encode_empty() {
849            assert!(encode(&[]).is_empty());
850        }
851
852        #[test]
853        fn encode_single_byte() {
854            assert_eq!(encode(&[b'A']), &[0, b'A', 128]);
855        }
856
857        #[test]
858        fn encode_eod_terminator() {
859            assert_eq!(*encode(b"Hello").last().unwrap(), 128);
860        }
861
862        #[test]
863        fn encode_decode_roundtrip_literal() {
864            let original = b"ABCDEFGH";
865            assert_eq!(decode(&encode(original)).unwrap(), original);
866        }
867
868        #[test]
869        fn encode_decode_roundtrip_repeated() {
870            let original = vec![b'X'; 64];
871            let encoded = encode(&original);
872            assert!(encoded.len() < original.len());
873            assert_eq!(decode(&encoded).unwrap(), original);
874        }
875
876        #[test]
877        fn encode_decode_roundtrip_all_bytes() {
878            let original: Vec<u8> = (0u8..=255).collect();
879            assert_eq!(decode(&encode(&original)).unwrap(), original);
880        }
881
882        #[test]
883        fn encode_max_repeat_128() {
884            let original = vec![b'Z'; 128];
885            assert_eq!(decode(&encode(&original)).unwrap(), original);
886        }
887
888        // --- RunLengthScanlineDecoder ---
889
890        fn make_rl_image(rows: &[&[u8]]) -> Vec<u8> {
891            let mut encoded = Vec::new();
892            for row in rows {
893                for chunk in row.chunks(128) {
894                    encoded.push((chunk.len() - 1) as u8);
895                    encoded.extend_from_slice(chunk);
896                }
897            }
898            encoded.push(128);
899            encoded
900        }
901
902        #[test]
903        fn rl_scanline_basic() {
904            let src = make_rl_image(&[&[1u8, 2, 3], &[4u8, 5, 6]]);
905            let mut dec = create_decoder(&src, 3, 2, 1, 8).unwrap();
906
907            assert_eq!(dec.width(), 3);
908            assert_eq!(dec.height(), 2);
909            assert_eq!(dec.count_comps(), 1);
910            assert_eq!(dec.bpc(), 8);
911            assert_eq!(dec.row_stride(), 3);
912
913            assert_eq!(dec.decode_scanline().unwrap().unwrap(), &[1, 2, 3]);
914            assert_eq!(dec.current_line(), Some(0));
915            assert_eq!(dec.decode_scanline().unwrap().unwrap(), &[4, 5, 6]);
916            assert_eq!(dec.current_line(), Some(1));
917            assert!(dec.decode_scanline().unwrap().is_none());
918        }
919
920        #[test]
921        fn rl_scanline_repeat_run() {
922            let src = [253u8, b'A', 128];
923            let mut dec = create_decoder(&src, 4, 1, 1, 8).unwrap();
924            assert_eq!(
925                dec.decode_scanline().unwrap().unwrap(),
926                &[b'A', b'A', b'A', b'A']
927            );
928            assert!(dec.decode_scanline().unwrap().is_none());
929        }
930
931        #[test]
932        fn rl_scanline_reset() {
933            let src = make_rl_image(&[&[1u8, 2, 3, 4], &[1u8, 2, 3, 4]]);
934            let mut dec = create_decoder(&src, 4, 2, 1, 8).unwrap();
935            let first = dec.decode_scanline().unwrap().unwrap().to_vec();
936            dec.reset().unwrap();
937            assert_eq!(dec.current_line(), None);
938            assert_eq!(dec.decode_scanline().unwrap().unwrap(), first);
939        }
940
941        #[test]
942        fn rl_scanline_current_line_before_read() {
943            let src = make_rl_image(&[&[0u8, 0, 0]]);
944            assert_eq!(
945                create_decoder(&src, 3, 1, 1, 8).unwrap().current_line(),
946                None
947            );
948        }
949
950        #[test]
951        fn rl_scanline_too_small_returns_error() {
952            let src = [0u8, 42, 128];
953            assert!(create_decoder(&src, 2, 2, 1, 8).is_err());
954        }
955
956        #[test]
957        fn rl_scanline_run_spanning_rows() {
958            let src = [251u8, b'X', 128]; // 257-251=6 × 'X', 3 bytes/row
959            let mut dec = create_decoder(&src, 3, 2, 1, 8).unwrap();
960            assert_eq!(dec.decode_scanline().unwrap().unwrap(), &[b'X', b'X', b'X']);
961            assert_eq!(dec.decode_scanline().unwrap().unwrap(), &[b'X', b'X', b'X']);
962            assert!(dec.decode_scanline().unwrap().is_none());
963        }
964    }
965}
rpdfium_codec/basic/mod.rs

rpdfium_codec/basic/
mod.rs