rpdfium_codec/basic/
mod.rs

1// Derived from PDFium's basic/basicmodule.h/cpp
2// Original: Copyright 2014 The PDFium Authors
3// Licensed under BSD-3-Clause / Apache-2.0
4// See pdfium-upstream/LICENSE for the original license.
5
6//! Basic stream filters — ASCII85, ASCIIHex, RunLength.
7//!
8//! All three filters are consolidated here, mirroring PDFium's single
9//! `core/fxcodec/basic/basicmodule.h/cpp` source file.
10//!
11//! | PDFium `BasicModule` | rpdfium |
12//! |---|---|
13//! | `A85Encode()` | [`ascii85::encode`] |
14//! | *(decode via codec pipeline)* | [`ascii85::decode`] |
15//! | *(decode via codec pipeline)* | [`ascii_hex::decode`] |
16//! | `RunLengthEncode()` | [`run_length::encode`] |
17//! | *(decode via codec pipeline)* | [`run_length::decode`] |
18//! | `CreateRunLengthDecoder()` | [`run_length::create_decoder`] |
19
20// ---------------------------------------------------------------------------
21// ascii85
22// ---------------------------------------------------------------------------
23
24pub mod ascii85 {
25    //! ASCII85Decode / ASCII85Encode filter.
26
27    use crate::error::DecodeError;
28
29    /// Encode binary data as ASCII85 (base-85).
30    ///
31    /// - Groups of 4 bytes encode to 5 ASCII characters in `!`–`u` range.
32    /// - A group of 4 zero bytes encodes to the single character `z`.
33    /// - A line break (`\r\n`) is inserted every 75 output characters.
34    /// - Terminated with `~>`.
35    ///
36    /// Mirrors PDFium's `BasicModule::A85Encode()`.
37    pub fn encode(input: &[u8]) -> Vec<u8> {
38        if input.is_empty() {
39            return Vec::new();
40        }
41
42        // Worst case: 5/4 expansion + line breaks + "~>" terminator
43        let estimated = (input.len() / 4 + 1) * 5 + input.len() / 30 + 4;
44        let mut output = Vec::with_capacity(estimated);
45        let mut line_length = 0usize;
46        let mut pos = 0usize;
47
48        // Process full 4-byte groups
49        while pos + 4 <= input.len() {
50            let val =
51                u32::from_be_bytes([input[pos], input[pos + 1], input[pos + 2], input[pos + 3]]);
52            pos += 4;
53
54            if val == 0 {
55                // All-zero special case: 'z'
56                output.push(b'z');
57                line_length += 1;
58            } else {
59                let mut v = val as u64;
60                let mut chars = [0u8; 5];
61                for c in chars.iter_mut().rev() {
62                    *c = (v % 85) as u8 + 33;
63                    v /= 85;
64                }
65                output.extend_from_slice(&chars);
66                line_length += 5;
67            }
68
69            if line_length >= 75 {
70                output.extend_from_slice(b"\r\n");
71                line_length = 0;
72            }
73        }
74
75        // Leftover bytes (1–3): pad to 4 bytes, output count+1 digits
76        if pos < input.len() {
77            let count = input.len() - pos;
78            let mut val = 0u32;
79            for k in 0..count {
80                val |= u32::from(input[pos + k]) << (8 * (3 - k));
81            }
82            let mut v = val as u64;
83            let mut chars = [0u8; 5];
84            // Compute all 5 digits (most-to-least significant at indices 0–4)
85            // but only write the first count+1 of them.
86            for i in (0..5usize).rev() {
87                if i <= count {
88                    chars[i] = (v % 85) as u8 + 33;
89                }
90                v /= 85;
91            }
92            output.extend_from_slice(&chars[..count + 1]);
93        }
94
95        output.extend_from_slice(b"~>");
96        output
97    }
98
99    /// Decode ASCII85 (base-85) encoded data.
100    ///
101    /// - Groups of 5 ASCII characters in the range `!` (33) to `u` (117)
102    ///   decode to 4 binary bytes.
103    /// - The character `z` is a shortcut for 4 zero bytes.
104    /// - The end-of-data marker is `~>`.
105    /// - Whitespace is ignored.
106    pub fn decode(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
107        let mut output = Vec::with_capacity(input.len() * 4 / 5);
108        let mut group = [0u8; 5];
109        let mut count = 0usize;
110
111        let mut i = 0;
112        while i < input.len() {
113            let b = input[i];
114            i += 1;
115
116            // Whitespace — skip
117            if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' || b == b'\x0C' {
118                continue;
119            }
120
121            // End-of-data marker
122            if b == b'~' {
123                if i < input.len() && input[i] == b'>' {
124                    break; // consume '>'
125                }
126                break; // '~' without '>' — lenient
127            }
128
129            // 'z' shortcut for four zero bytes
130            if b == b'z' {
131                if count != 0 {
132                    return Err(DecodeError::InvalidInput(
133                        "ASCII85: 'z' inside a group".into(),
134                    ));
135                }
136                output.extend_from_slice(&[0, 0, 0, 0]);
137                continue;
138            }
139
140            // Valid ASCII85 character range: '!' (33) to 'u' (117)
141            if !(b'!'..=b'u').contains(&b) {
142                return Err(DecodeError::InvalidInput(format!(
143                    "ASCII85: invalid character 0x{b:02X}"
144                )));
145            }
146
147            group[count] = b - b'!';
148            count += 1;
149
150            if count == 5 {
151                let value = u64::from(group[0]) * 85 * 85 * 85 * 85
152                    + u64::from(group[1]) * 85 * 85 * 85
153                    + u64::from(group[2]) * 85 * 85
154                    + u64::from(group[3]) * 85
155                    + u64::from(group[4]);
156
157                if value > u64::from(u32::MAX) {
158                    return Err(DecodeError::InvalidInput(
159                        "ASCII85: group value exceeds 2^32-1".into(),
160                    ));
161                }
162
163                let value = value as u32;
164                output.push((value >> 24) as u8);
165                output.push((value >> 16) as u8);
166                output.push((value >> 8) as u8);
167                output.push(value as u8);
168                count = 0;
169            }
170        }
171
172        // Handle remaining partial group (1-4 chars)
173        if count > 0 {
174            if count == 1 {
175                return Err(DecodeError::InvalidInput(
176                    "ASCII85: single trailing character is invalid".into(),
177                ));
178            }
179
180            // Pad with 'u' (84) to make a full group
181            for slot in group.iter_mut().skip(count) {
182                *slot = 84; // 'u' - '!' = 84
183            }
184
185            let value = u64::from(group[0]) * 85 * 85 * 85 * 85
186                + u64::from(group[1]) * 85 * 85 * 85
187                + u64::from(group[2]) * 85 * 85
188                + u64::from(group[3]) * 85
189                + u64::from(group[4]);
190
191            if value > u64::from(u32::MAX) {
192                return Err(DecodeError::InvalidInput(
193                    "ASCII85: padded group value exceeds 2^32-1".into(),
194                ));
195            }
196
197            let value = value as u32;
198            let bytes = value.to_be_bytes();
199            output.extend_from_slice(&bytes[..count - 1]);
200        }
201
202        Ok(output)
203    }
204
205    #[cfg(test)]
206    mod tests {
207        use super::*;
208
209        // --- encode ---
210
211        #[test]
212        fn test_encode_empty() {
213            assert!(encode(&[]).is_empty());
214        }
215
216        #[test]
217        fn test_encode_basic() {
218            assert_eq!(encode(b"Man "), b"9jqo^~>");
219        }
220
221        #[test]
222        fn test_encode_all_zeros() {
223            assert_eq!(encode(&[0, 0, 0, 0]), b"z~>");
224        }
225
226        #[test]
227        fn test_encode_partial_group() {
228            let result = encode(b"a");
229            assert!(result.ends_with(b"~>"));
230            assert_eq!(result.len(), 4); // 2 digits + "~>"
231        }
232
233        #[test]
234        fn test_encode_decode_roundtrip() {
235            let original = b"Hello, World! This is a test of ASCII85.";
236            let decoded = decode(&encode(original)).unwrap();
237            assert_eq!(decoded, original);
238        }
239
240        #[test]
241        fn test_encode_decode_roundtrip_all_bytes() {
242            let original: Vec<u8> = (0u8..=255).collect();
243            assert_eq!(decode(&encode(&original)).unwrap(), original);
244        }
245
246        #[test]
247        fn test_encode_line_break_at_75() {
248            let input = vec![0xABu8; 80];
249            let encoded = encode(&input);
250            assert!(encoded.windows(2).any(|w| w == b"\r\n"));
251        }
252
253        // --- decode ---
254
255        #[test]
256        fn test_decode_basic() {
257            assert_eq!(decode(b"9jqo^~>").unwrap(), b"Man ");
258        }
259
260        #[test]
261        fn test_decode_z_shortcut() {
262            assert_eq!(decode(b"z~>").unwrap(), vec![0, 0, 0, 0]);
263        }
264
265        #[test]
266        fn test_decode_z_shortcut_multiple() {
267            assert_eq!(decode(b"zz~>").unwrap(), vec![0u8; 8]);
268        }
269
270        #[test]
271        fn test_decode_whitespace_ignored() {
272            assert_eq!(decode(b"9 jqo ^\n~>").unwrap(), b"Man ");
273        }
274
275        #[test]
276        fn test_decode_empty() {
277            assert!(decode(b"~>").unwrap().is_empty());
278        }
279
280        #[test]
281        fn test_decode_partial_group() {
282            let result = decode(b"9jqo^BlbD~>").unwrap();
283            assert_eq!(&result[..4], b"Man ");
284            assert_eq!(result.len(), 7);
285        }
286
287        #[test]
288        fn test_decode_single_trailing_char_invalid() {
289            assert!(decode(b"A~>").is_err());
290        }
291
292        #[test]
293        fn test_decode_invalid_char() {
294            assert!(decode(b"9jqo^{~>").is_err());
295        }
296
297        #[test]
298        fn test_decode_z_inside_group_error() {
299            assert!(decode(b"Az~>").is_err());
300        }
301
302        #[test]
303        fn test_decode_no_eod_marker() {
304            assert_eq!(decode(b"9jqo^").unwrap(), b"Man ");
305        }
306
307        #[test]
308        fn test_decode_known_string() {
309            assert_eq!(decode(b"87cURDe~>").unwrap(), b"Hello");
310        }
311    }
312}
313
314// ---------------------------------------------------------------------------
315// ascii_hex
316// ---------------------------------------------------------------------------
317
318pub mod ascii_hex {
319    //! ASCIIHexDecode filter.
320    //!
321    //! Note: PDFium's `BasicModule` does not include an ASCIIHex encoder;
322    //! this decode-only module is an rpdfium extension.
323
324    use crate::error::DecodeError;
325
326    /// Decode ASCIIHex-encoded data.
327    ///
328    /// - Pairs of hexadecimal digits decode to bytes.
329    /// - `>` marks end of data.
330    /// - Whitespace is ignored.
331    /// - An odd trailing nibble is zero-padded on the right (e.g., `A` → `0xA0`).
332    pub fn decode(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
333        let mut output = Vec::with_capacity(input.len() / 2);
334        let mut high_nibble: Option<u8> = None;
335
336        for &b in input {
337            if b == b'>' {
338                break;
339            }
340            if b == b' ' || b == b'\t' || b == b'\n' || b == b'\r' || b == b'\x0C' {
341                continue;
342            }
343
344            let nibble = hex_nibble(b)?;
345            match high_nibble.take() {
346                None => high_nibble = Some(nibble),
347                Some(high) => output.push((high << 4) | nibble),
348            }
349        }
350
351        // Odd trailing nibble: pad with 0 on the right
352        if let Some(high) = high_nibble {
353            output.push(high << 4);
354        }
355
356        Ok(output)
357    }
358
359    fn hex_nibble(b: u8) -> Result<u8, crate::error::DecodeError> {
360        match b {
361            b'0'..=b'9' => Ok(b - b'0'),
362            b'a'..=b'f' => Ok(b - b'a' + 10),
363            b'A'..=b'F' => Ok(b - b'A' + 10),
364            _ => Err(crate::error::DecodeError::InvalidInput(format!(
365                "ASCIIHex: invalid hex character 0x{b:02X}"
366            ))),
367        }
368    }
369
370    #[cfg(test)]
371    mod tests {
372        use super::*;
373
374        #[test]
375        fn test_decode_basic() {
376            assert_eq!(decode(b"48656C6C6F>").unwrap(), b"Hello");
377        }
378
379        #[test]
380        fn test_decode_lowercase() {
381            assert_eq!(decode(b"48656c6c6f>").unwrap(), b"Hello");
382        }
383
384        #[test]
385        fn test_decode_whitespace_ignored() {
386            assert_eq!(decode(b"48 65 6C\n6C 6F>").unwrap(), b"Hello");
387        }
388
389        #[test]
390        fn test_decode_odd_nibble() {
391            assert_eq!(decode(b"A>").unwrap(), vec![0xA0]);
392        }
393
394        #[test]
395        fn test_decode_empty() {
396            assert!(decode(b">").unwrap().is_empty());
397        }
398
399        #[test]
400        fn test_decode_no_eod_marker() {
401            assert_eq!(decode(b"4865").unwrap(), b"He");
402        }
403
404        #[test]
405        fn test_decode_invalid_char() {
406            assert!(decode(b"4G>").is_err());
407        }
408
409        #[test]
410        fn test_decode_all_zeros() {
411            assert_eq!(decode(b"0000>").unwrap(), vec![0, 0]);
412        }
413
414        #[test]
415        fn test_decode_all_ff() {
416            assert_eq!(decode(b"FFFF>").unwrap(), vec![0xFF, 0xFF]);
417        }
418
419        // ---------------------------------------------------------------
420        // Tests ported from upstream fpdf_parser_decode_unittest.cpp
421        // ---------------------------------------------------------------
422
423        /// Upstream: TEST(ParserDecodeTest, HexDecode) — empty src string
424        #[test]
425        fn test_parser_hex_decode_empty() {
426            assert!(decode(b"").unwrap().is_empty());
427        }
428
429        /// Upstream: TEST(ParserDecodeTest, HexDecode) — empty content with EOD
430        #[test]
431        fn test_parser_hex_decode_empty_content() {
432            assert!(decode(b">").unwrap().is_empty());
433        }
434
435        /// Upstream: TEST(ParserDecodeTest, HexDecode) — only whitespace
436        #[test]
437        fn test_parser_hex_decode_only_whitespace() {
438            assert!(decode(b"\t   \r\n>").unwrap().is_empty());
439        }
440
441        /// Upstream: TEST(ParserDecodeTest, HexDecode) — regular conversion
442        #[test]
443        fn test_parser_hex_decode_regular() {
444            assert_eq!(decode(b"12Ac>zzz").unwrap(), vec![0x12, 0xAC]);
445        }
446
447        /// Upstream: TEST(ParserDecodeTest, HexDecode) — skip whitespace
448        #[test]
449        fn test_parser_hex_decode_skip_whitespace() {
450            assert_eq!(
451                decode(b"12 Ac\t02\r\nBF>zzz>").unwrap(),
452                vec![0x12, 0xAC, 0x02, 0xBF]
453            );
454        }
455
456        /// Upstream: TEST(ParserDecodeTest, HexDecode) — non-multiple length
457        #[test]
458        fn test_parser_hex_decode_non_multiple() {
459            assert_eq!(decode(b"12A>zzz").unwrap(), vec![0x12, 0xA0]);
460        }
461
462        /// Upstream: TEST(ParserDecodeTest, HexDecode) — no ending mark
463        #[test]
464        fn test_parser_hex_decode_no_ending_mark() {
465            assert_eq!(
466                decode(b"12AcED3c3456").unwrap(),
467                vec![0x12, 0xAC, 0xED, 0x3C, 0x34, 0x56]
468            );
469        }
470    }
471}
472
473// ---------------------------------------------------------------------------
474// run_length
475// ---------------------------------------------------------------------------
476
477pub mod run_length {
478    //! RunLengthDecode / RunLengthEncode filter and scanline decoder.
479    //!
480    //! Mirrors PDFium's `BasicModule::RunLengthEncode()`,
481    //! `BasicModule::CreateRunLengthDecoder()`, and the internal
482    //! `RLScanlineDecoder`.
483
484    use crate::error::DecodeError;
485    use crate::scanline::ScanlineDecoder;
486
487    // -----------------------------------------------------------------------
488    // Batch decode
489    // -----------------------------------------------------------------------
490
491    /// Decode run-length encoded data.
492    ///
493    /// - Length byte 0–127: copy the next `length + 1` bytes literally.
494    /// - Length byte 129–255: repeat the next byte `257 - length` times.
495    /// - Length byte 128: end of data (EOD).
496    pub fn decode(input: &[u8]) -> Result<Vec<u8>, DecodeError> {
497        let mut output = Vec::new();
498        let mut i = 0;
499
500        while i < input.len() {
501            let length = input[i];
502            i += 1;
503
504            if length == 128 {
505                break; // EOD
506            }
507
508            if length <= 127 {
509                let count = usize::from(length) + 1;
510                if i + count > input.len() {
511                    return Err(DecodeError::InvalidInput(
512                        "RunLength: literal run extends past end of input".into(),
513                    ));
514                }
515                output.extend_from_slice(&input[i..i + count]);
516                i += count;
517            } else {
518                if i >= input.len() {
519                    return Err(DecodeError::InvalidInput(
520                        "RunLength: repeat run missing data byte".into(),
521                    ));
522                }
523                let count = 257 - usize::from(length);
524                let byte = input[i];
525                i += 1;
526                output.resize(output.len() + count, byte);
527            }
528        }
529
530        Ok(output)
531    }
532
533    // -----------------------------------------------------------------------
534    // Encode
535    // -----------------------------------------------------------------------
536
537    /// Encode data using run-length encoding.
538    ///
539    /// - Matched runs of 2–128 identical bytes → `[257 - run_len, byte]`.
540    /// - Mismatched runs of 1–128 distinct bytes → `[run_len - 1, b0, b1, ...]`.
541    /// - Terminated with the EOD marker `128`.
542    ///
543    /// Mirrors PDFium's `BasicModule::RunLengthEncode()`.
544    pub fn encode(input: &[u8]) -> Vec<u8> {
545        if input.is_empty() {
546            return vec![];
547        }
548        if input.len() == 1 {
549            return vec![0, input[0], 128];
550        }
551
552        // Worst case: 4 output bytes per 3 input (plus terminator).
553        let estimated = input.len().div_ceil(3) * 4 + 1;
554        let mut result = vec![0u8; estimated];
555        let mut wpos = 0usize;
556
557        let mut run_start = 0usize;
558        let mut run_end = 1usize;
559        let mut x = input[run_start];
560        let mut y = input[run_end];
561
562        while run_end < input.len() {
563            let max_len = 128usize.min(input.len() - run_start);
564
565            while x == y && run_end - run_start < max_len - 1 {
566                run_end += 1;
567                y = input[run_end];
568            }
569
570            if x == y {
571                run_end += 1;
572                if run_end < input.len() {
573                    y = input[run_end];
574                }
575            }
576
577            if run_end - run_start > 1 {
578                result[wpos] = (257 - (run_end - run_start)) as u8;
579                result[wpos + 1] = x;
580                wpos += 2;
581                x = y;
582                run_start = run_end;
583                run_end += 1;
584                if run_end < input.len() {
585                    y = input[run_end];
586                }
587                continue;
588            }
589
590            while x != y && run_end <= run_start + max_len {
591                result[wpos + (run_end - run_start)] = x;
592                x = y;
593                run_end += 1;
594                if run_end == input.len() {
595                    if run_end <= run_start + max_len {
596                        result[wpos + (run_end - run_start)] = x;
597                        run_end += 1;
598                    }
599                    break;
600                }
601                y = input[run_end];
602            }
603            result[wpos] = (run_end - run_start - 2) as u8;
604            wpos += run_end - run_start;
605            run_start = run_end - 1;
606        }
607
608        if run_start < input.len() {
609            result[wpos] = 0;
610            result[wpos + 1] = x;
611            wpos += 2;
612        }
613
614        result[wpos] = 128; // EOD
615        result.truncate(wpos + 1);
616        result
617    }
618
619    // -----------------------------------------------------------------------
620    // ScanlineDecoder (RLScanlineDecoder port)
621    // -----------------------------------------------------------------------
622
623    /// State of the current run-length operator between scanlines.
624    #[derive(Clone, Copy)]
625    enum RunOp {
626        Literal { remaining: usize },
627        Repeat { remaining: usize, byte: u8 },
628        Eod,
629        Init,
630    }
631
632    /// Scanline-based RunLength decoder.
633    ///
634    /// Yields one decoded scanline at a time, maintaining run-length state
635    /// across scanline boundaries. Mirrors PDFium's `RLScanlineDecoder`.
636    ///
637    /// Construct via [`create_decoder`].
638    pub struct RunLengthScanlineDecoder {
639        src: Vec<u8>,
640        width: u32,
641        height: u32,
642        comps: u8,
643        bpc: u8,
644        row_stride: usize,
645        scanline: Vec<u8>,
646        src_offset: usize,
647        op: RunOp,
648        lines_decoded: usize,
649    }
650
651    /// Create a scanline decoder for RunLength-encoded image data.
652    ///
653    /// Validates that `src` contains enough bytes to decode `width × height`
654    /// pixels, then returns a decoder ready to yield one scanline per call.
655    ///
656    /// Mirrors PDFium's `BasicModule::CreateRunLengthDecoder()`.
657    pub fn create_decoder(
658        src: &[u8],
659        width: u32,
660        height: u32,
661        comps: u8,
662        bpc: u8,
663    ) -> Result<RunLengthScanlineDecoder, DecodeError> {
664        let row_stride = (width as usize * comps as usize * bpc as usize).div_ceil(8);
665        let decoder = RunLengthScanlineDecoder {
666            src: src.to_vec(),
667            width,
668            height,
669            comps,
670            bpc,
671            row_stride,
672            scanline: vec![0u8; row_stride],
673            src_offset: 0,
674            op: RunOp::Init,
675            lines_decoded: 0,
676        };
677        if !decoder.check_dest_size() {
678            return Err(DecodeError::InvalidInput(
679                "RunLength: compressed data too small for image dimensions".into(),
680            ));
681        }
682        Ok(decoder)
683    }
684
685    impl RunLengthScanlineDecoder {
686        /// Pre-validate that `src` decodes to at least the required number of
687        /// bytes. Mirrors `RLScanlineDecoder::CheckDestSize()`.
688        fn check_dest_size(&self) -> bool {
689            let mut i = 0usize;
690            let mut dest: u64 = 0;
691            while i < self.src.len() {
692                let b = self.src[i];
693                if b < 128 {
694                    dest += b as u64 + 1;
695                    i += b as usize + 2;
696                } else if b > 128 {
697                    dest += 257 - b as u64;
698                    i += 2;
699                } else {
700                    break; // EOD
701                }
702            }
703            let required =
704                self.width as u64 * self.comps as u64 * self.bpc as u64 * self.height as u64;
705            dest * 8 >= required
706        }
707
708        fn read_next_op(&mut self) {
709            if self.src_offset >= self.src.len() {
710                self.op = RunOp::Eod;
711                return;
712            }
713            let b = self.src[self.src_offset];
714            self.src_offset += 1;
715            self.op = if b < 128 {
716                RunOp::Literal {
717                    remaining: b as usize + 1,
718                }
719            } else if b > 128 {
720                let data_byte = if self.src_offset < self.src.len() {
721                    self.src[self.src_offset]
722                } else {
723                    0
724                };
725                RunOp::Repeat {
726                    remaining: 257 - b as usize,
727                    byte: data_byte,
728                }
729            } else {
730                RunOp::Eod
731            };
732        }
733
734        /// Mirrors `RLScanlineDecoder::GetNextLine()`.
735        fn fill_scanline(&mut self) -> bool {
736            if matches!(self.op, RunOp::Init) {
737                self.read_next_op();
738            }
739            if matches!(self.op, RunOp::Eod) {
740                return false;
741            }
742
743            self.scanline.fill(0);
744            let mut col = 0usize;
745
746            loop {
747                let space = self.row_stride - col;
748                if space == 0 {
749                    break;
750                }
751
752                match self.op {
753                    RunOp::Literal { remaining } => {
754                        let src_left = self.src.len().saturating_sub(self.src_offset);
755                        let copy = remaining.min(space).min(src_left);
756                        let src_end = self.src_offset + copy;
757                        self.scanline[col..col + copy]
758                            .copy_from_slice(&self.src[self.src_offset..src_end]);
759                        col += copy;
760                        self.src_offset += copy;
761                        let new_remaining = remaining - copy;
762                        if new_remaining == 0 {
763                            self.read_next_op();
764                        } else {
765                            self.op = RunOp::Literal {
766                                remaining: new_remaining,
767                            };
768                            break;
769                        }
770                    }
771                    RunOp::Repeat { remaining, byte } => {
772                        let copy = remaining.min(space);
773                        self.scanline[col..col + copy].fill(byte);
774                        col += copy;
775                        let new_remaining = remaining - copy;
776                        if new_remaining == 0 {
777                            self.src_offset += 1;
778                            self.read_next_op();
779                        } else {
780                            self.op = RunOp::Repeat {
781                                remaining: new_remaining,
782                                byte,
783                            };
784                            break;
785                        }
786                    }
787                    RunOp::Eod | RunOp::Init => break,
788                }
789
790                if matches!(self.op, RunOp::Eod) {
791                    break;
792                }
793            }
794
795            true
796        }
797    }
798
799    impl ScanlineDecoder for RunLengthScanlineDecoder {
800        fn width(&self) -> u32 {
801            self.width
802        }
803        fn height(&self) -> u32 {
804            self.height
805        }
806        fn comps_count(&self) -> u8 {
807            self.comps
808        }
809        fn bpc(&self) -> u8 {
810            self.bpc
811        }
812        fn row_stride(&self) -> usize {
813            self.row_stride
814        }
815
816        fn decode_scanline(&mut self) -> Result<Option<&[u8]>, DecodeError> {
817            if self.lines_decoded >= self.height as usize {
818                return Ok(None);
819            }
820            if !self.fill_scanline() {
821                return Ok(None);
822            }
823            self.lines_decoded += 1;
824            Ok(Some(&self.scanline))
825        }
826
827        fn reset(&mut self) -> Result<(), DecodeError> {
828            self.scanline.fill(0);
829            self.src_offset = 0;
830            self.op = RunOp::Init;
831            self.lines_decoded = 0;
832            Ok(())
833        }
834
835        fn current_line(&self) -> Option<usize> {
836            if self.lines_decoded == 0 {
837                None
838            } else {
839                Some(self.lines_decoded - 1)
840            }
841        }
842    }
843
844    #[cfg(test)]
845    mod tests {
846        use super::*;
847
848        // --- decode ---
849
850        #[test]
851        fn test_decode_literal_run() {
852            assert_eq!(decode(&[2, b'A', b'B', b'C', 128]).unwrap(), b"ABC");
853        }
854
855        #[test]
856        fn test_decode_repeat_run() {
857            assert_eq!(decode(&[253, b'X', 128]).unwrap(), b"XXXX");
858        }
859
860        #[test]
861        fn test_decode_mixed_runs() {
862            assert_eq!(decode(&[1, b'A', b'B', 254, b'Z', 128]).unwrap(), b"ABZZZ");
863        }
864
865        #[test]
866        fn test_decode_eod_marker() {
867            assert!(decode(&[128]).unwrap().is_empty());
868        }
869
870        #[test]
871        fn test_decode_empty_input() {
872            assert!(decode(&[]).unwrap().is_empty());
873        }
874
875        #[test]
876        fn test_decode_truncated_literal() {
877            assert!(decode(&[1, b'A']).is_err());
878        }
879
880        #[test]
881        fn test_decode_truncated_repeat() {
882            assert!(decode(&[255]).is_err());
883        }
884
885        #[test]
886        fn test_decode_single_literal() {
887            assert_eq!(decode(&[0, b'Q', 128]).unwrap(), b"Q");
888        }
889
890        #[test]
891        fn test_decode_max_repeat() {
892            let result = decode(&[129, b'Y', 128]).unwrap();
893            assert_eq!(result.len(), 128);
894            assert!(result.iter().all(|&b| b == b'Y'));
895        }
896
897        // --- encode ---
898
899        #[test]
900        fn test_encode_empty() {
901            assert!(encode(&[]).is_empty());
902        }
903
904        #[test]
905        fn test_encode_single_byte() {
906            assert_eq!(encode(&[b'A']), &[0, b'A', 128]);
907        }
908
909        #[test]
910        fn test_encode_eod_terminator() {
911            assert_eq!(*encode(b"Hello").last().unwrap(), 128);
912        }
913
914        #[test]
915        fn test_encode_decode_roundtrip_literal() {
916            let original = b"ABCDEFGH";
917            assert_eq!(decode(&encode(original)).unwrap(), original);
918        }
919
920        #[test]
921        fn test_encode_decode_roundtrip_repeated() {
922            let original = vec![b'X'; 64];
923            let encoded = encode(&original);
924            assert!(encoded.len() < original.len());
925            assert_eq!(decode(&encoded).unwrap(), original);
926        }
927
928        #[test]
929        fn test_encode_decode_roundtrip_all_bytes() {
930            let original: Vec<u8> = (0u8..=255).collect();
931            assert_eq!(decode(&encode(&original)).unwrap(), original);
932        }
933
934        #[test]
935        fn test_encode_max_repeat_128() {
936            let original = vec![b'Z'; 128];
937            assert_eq!(decode(&encode(&original)).unwrap(), original);
938        }
939
940        // --- RunLengthScanlineDecoder ---
941
942        fn make_rl_image(rows: &[&[u8]]) -> Vec<u8> {
943            let mut encoded = Vec::new();
944            for row in rows {
945                for chunk in row.chunks(128) {
946                    encoded.push((chunk.len() - 1) as u8);
947                    encoded.extend_from_slice(chunk);
948                }
949            }
950            encoded.push(128);
951            encoded
952        }
953
954        #[test]
955        fn test_rl_scanline_basic() {
956            let src = make_rl_image(&[&[1u8, 2, 3], &[4u8, 5, 6]]);
957            let mut dec = create_decoder(&src, 3, 2, 1, 8).unwrap();
958
959            assert_eq!(dec.width(), 3);
960            assert_eq!(dec.height(), 2);
961            assert_eq!(dec.comps_count(), 1);
962            assert_eq!(dec.bpc(), 8);
963            assert_eq!(dec.row_stride(), 3);
964
965            assert_eq!(dec.decode_scanline().unwrap().unwrap(), &[1, 2, 3]);
966            assert_eq!(dec.current_line(), Some(0));
967            assert_eq!(dec.decode_scanline().unwrap().unwrap(), &[4, 5, 6]);
968            assert_eq!(dec.current_line(), Some(1));
969            assert!(dec.decode_scanline().unwrap().is_none());
970        }
971
972        #[test]
973        fn test_rl_scanline_repeat_run() {
974            let src = [253u8, b'A', 128];
975            let mut dec = create_decoder(&src, 4, 1, 1, 8).unwrap();
976            assert_eq!(
977                dec.decode_scanline().unwrap().unwrap(),
978                &[b'A', b'A', b'A', b'A']
979            );
980            assert!(dec.decode_scanline().unwrap().is_none());
981        }
982
983        #[test]
984        fn test_rl_scanline_reset() {
985            let src = make_rl_image(&[&[1u8, 2, 3, 4], &[1u8, 2, 3, 4]]);
986            let mut dec = create_decoder(&src, 4, 2, 1, 8).unwrap();
987            let first = dec.decode_scanline().unwrap().unwrap().to_vec();
988            dec.reset().unwrap();
989            assert_eq!(dec.current_line(), None);
990            assert_eq!(dec.decode_scanline().unwrap().unwrap(), first);
991        }
992
993        #[test]
994        fn test_rl_scanline_current_line_before_read() {
995            let src = make_rl_image(&[&[0u8, 0, 0]]);
996            assert_eq!(
997                create_decoder(&src, 3, 1, 1, 8).unwrap().current_line(),
998                None
999            );
1000        }
1001
1002        #[test]
1003        fn test_rl_scanline_too_small_returns_error() {
1004            let src = [0u8, 42, 128];
1005            assert!(create_decoder(&src, 2, 2, 1, 8).is_err());
1006        }
1007
1008        #[test]
1009        fn test_rl_scanline_run_spanning_rows() {
1010            let src = [251u8, b'X', 128]; // 257-251=6 × 'X', 3 bytes/row
1011            let mut dec = create_decoder(&src, 3, 2, 1, 8).unwrap();
1012            assert_eq!(dec.decode_scanline().unwrap().unwrap(), &[b'X', b'X', b'X']);
1013            assert_eq!(dec.decode_scanline().unwrap().unwrap(), &[b'X', b'X', b'X']);
1014            assert!(dec.decode_scanline().unwrap().is_none());
1015        }
1016    }
1017}
rpdfium_codec/basic/mod.rs

rpdfium_codec/basic/
mod.rs