pdf_create/
encoding.rs

1//! Text encodings
2
3use std::{
4    error::Error,
5    fmt,
6    io::{self, Write},
7};
8
9/// Encode the input slice so that it can be decoded with the
10/// *Ascii85Decode* filter. Returns the number of written bytes.
11pub fn ascii_85_encode<W: Write>(data: &[u8], w: &mut W) -> io::Result<usize> {
12    let mut ctr = 0;
13    let mut cut = 75;
14
15    let mut chunks_exact = data.chunks_exact(4);
16    for group in &mut chunks_exact {
17        let buf = u32::from_be_bytes([group[0], group[1], group[2], group[3]]);
18        if buf == 0 {
19            w.write_all(&[0x7A])?; // `z`
20            ctr += 1;
21        } else {
22            let (c_5, buf) = ((buf % 85) as u8 + 33, buf / 85);
23            let (c_4, buf) = ((buf % 85) as u8 + 33, buf / 85);
24            let (c_3, buf) = ((buf % 85) as u8 + 33, buf / 85);
25            let (c_2, buf) = ((buf % 85) as u8 + 33, buf / 85);
26            let c_1 = buf as u8 + 33;
27            w.write_all(&[c_1, c_2, c_3, c_4, c_5])?;
28            ctr += 5;
29        }
30
31        if ctr >= cut {
32            w.write_all(&[0x0A])?;
33            ctr += 1;
34            cut = ctr + 75;
35        }
36    }
37    match *chunks_exact.remainder() {
38        [b_1] => {
39            let buf = u32::from_be_bytes([b_1, 0, 0, 0]) / (85 * 85 * 85);
40            let (c_2, buf) = ((buf % 85) as u8 + 33, buf / 85);
41            let c_1 = buf as u8 + 33;
42            w.write_all(&[c_1, c_2, 0x7E, 0x3E])?;
43            ctr += 4;
44        }
45        [b_1, b_2] => {
46            let buf = u32::from_be_bytes([b_1, b_2, 0, 0]) / (85 * 85);
47            let (c_3, buf) = ((buf % 85) as u8 + 33, buf / 85);
48            let (c_2, buf) = ((buf % 85) as u8 + 33, buf / 85);
49            let c_1 = buf as u8 + 33;
50            w.write_all(&[c_1, c_2, c_3, 0x7E, 0x3E])?;
51            ctr += 5;
52        }
53        [b_1, b_2, b_3] => {
54            let buf = u32::from_be_bytes([b_1, b_2, b_3, 0]) / 85;
55            let (c_4, buf) = ((buf % 85) as u8 + 33, buf / 85);
56            let (c_3, buf) = ((buf % 85) as u8 + 33, buf / 85);
57            let (c_2, buf) = ((buf % 85) as u8 + 33, buf / 85);
58            let c_1 = buf as u8 + 33;
59            w.write_all(&[c_1, c_2, c_3, c_4, 0x7E, 0x3E])?;
60            ctr += 6;
61        }
62        _ => {
63            w.write_all(&[0x7E, 0x3E])?;
64            ctr += 2;
65        }
66    }
67
68    Ok(ctr)
69}
70
71#[derive(Debug)]
72/// Codepoint U+{0:04x} is not valid in PDFDocEncoding
73pub struct PDFDocEncodingError(char);
74
75impl Error for PDFDocEncodingError {}
76impl fmt::Display for PDFDocEncodingError {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        write!(
79            f,
80            "Codepoint U+{:04x} is not valid in PDFDocEncoding",
81            self.0 as u32
82        )
83    }
84}
85
86fn pdf_char_encode(chr: char) -> Result<u8, PDFDocEncodingError> {
87    match u32::from(chr) {
88        0x00..=0x17 | 0x20..=0x7E | 0xA1..=0xff => Ok(chr as u8),
89
90        0x02D8 => Ok(0x18),
91        0x02C7 => Ok(0x19),
92        0x02C6 => Ok(0x1A),
93        0x02D9 => Ok(0x1B),
94        0x02DD => Ok(0x1C),
95        0x02DB => Ok(0x1D),
96        0x02DA => Ok(0x1E),
97        0x02DC => Ok(0x1F),
98        0x2022 => Ok(0x80),
99        0x2020 => Ok(0x81),
100        0x2021 => Ok(0x82),
101        0x2026 => Ok(0x83),
102        0x2014 => Ok(0x84),
103        0x2013 => Ok(0x85),
104        0x0192 => Ok(0x86),
105        0x2044 => Ok(0x87),
106        0x2039 => Ok(0x88),
107        0x203A => Ok(0x89),
108        0x2212 => Ok(0x8A),
109        0x2030 => Ok(0x8B),
110        0x201E => Ok(0x8C),
111        0x201C => Ok(0x8D),
112        0x201D => Ok(0x8E),
113        0x2018 => Ok(0x8F),
114
115        0x2019 => Ok(0x90),
116        0x201A => Ok(0x91),
117        0x2122 => Ok(0x92),
118        0xFB01 => Ok(0x93),
119        0xFB02 => Ok(0x94),
120        0x0141 => Ok(0x95),
121        0x0152 => Ok(0x96),
122        0x0160 => Ok(0x97),
123        0x0178 => Ok(0x98),
124        0x017D => Ok(0x99),
125        0x0131 => Ok(0x9A),
126        0x0142 => Ok(0x9B),
127        0x0153 => Ok(0x9C),
128        0x0161 => Ok(0x9D),
129        0x017e => Ok(0x9E),
130
131        0x20AC => Ok(0xA0),
132
133        _ => Err(PDFDocEncodingError(chr)),
134    }
135}
136
137/// Encode a string as PDFDocEncoding
138pub fn pdf_doc_encode(input: &str) -> Result<Vec<u8>, PDFDocEncodingError> {
139    input.chars().map(pdf_char_encode).collect()
140}