Skip to main content

hayro_postscript/
string.rs

1mod ascii_85;
2pub(crate) mod ascii_hex;
3mod literal;
4
5use alloc::vec::Vec;
6
7use crate::error::{Error, Result};
8use crate::reader::Reader;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11enum StringInner<'a> {
12    Literal(&'a [u8]),
13    Hex(&'a [u8]),
14    Ascii85(&'a [u8]),
15}
16
17/// A PostScript string object.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub struct String<'a> {
20    inner: StringInner<'a>,
21}
22
23impl<'a> String<'a> {
24    pub(crate) const fn from_literal(data: &'a [u8]) -> Self {
25        Self {
26            inner: StringInner::Literal(data),
27        }
28    }
29
30    pub(crate) const fn from_hex(data: &'a [u8]) -> Self {
31        Self {
32            inner: StringInner::Hex(data),
33        }
34    }
35
36    pub(crate) const fn from_ascii85(data: &'a [u8]) -> Self {
37        Self {
38            inner: StringInner::Ascii85(data),
39        }
40    }
41
42    /// Decode the string content into `out`, replacing any previous contents.
43    pub fn decode_into(&self, out: &mut Vec<u8>) -> Result<()> {
44        out.clear();
45        match self.inner {
46            StringInner::Literal(data) => literal::decode_into(data, out),
47            StringInner::Hex(data) => ascii_hex::decode_into(data, out),
48            StringInner::Ascii85(data) => ascii_85::decode_into(data, out),
49        }
50        .ok_or(Error::SyntaxError)
51    }
52
53    /// Decode the string content.
54    pub fn decode(&self) -> Result<Vec<u8>> {
55        let mut out = Vec::new();
56        self.decode_into(&mut out)?;
57        Ok(out)
58    }
59}
60
61pub(crate) fn parse_literal<'a>(r: &mut Reader<'a>) -> Option<&'a [u8]> {
62    let start = r.offset();
63    skip_literal(r)?;
64    let end = r.offset();
65    // Exclude outer parentheses.
66    r.range(start + 1..end - 1)
67}
68
69pub(crate) fn parse_hex<'a>(r: &mut Reader<'a>) -> Option<&'a [u8]> {
70    r.forward_tag(b"<")?;
71    let start = r.offset();
72    while let Some(b) = r.read_byte() {
73        if b == b'>' {
74            return r.range(start..r.offset() - 1);
75        }
76    }
77    None
78}
79
80pub(crate) fn parse_ascii85<'a>(r: &mut Reader<'a>) -> Option<&'a [u8]> {
81    r.forward_tag(b"<~")?;
82    let start = r.offset();
83    loop {
84        let b = r.read_byte()?;
85        if b == b'~' {
86            let end = r.offset() - 1;
87            r.forward_tag(b">")?;
88            return r.range(start..end);
89        }
90    }
91}
92
93fn skip_literal(r: &mut Reader<'_>) -> Option<()> {
94    r.forward_tag(b"(")?;
95    let mut depth = 1_u32;
96
97    while depth > 0 {
98        let byte = r.read_byte()?;
99        match byte {
100            b'\\' => {
101                let _ = r.read_byte()?;
102            }
103            b'(' => depth += 1,
104            b')' => depth -= 1,
105            _ => {}
106        }
107    }
108
109    Some(())
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    fn decode_literal(input: &[u8]) -> Result<Vec<u8>> {
117        let mut r = Reader::new(input);
118        let data = parse_literal(&mut r).ok_or(Error::SyntaxError)?;
119        String::from_literal(data).decode()
120    }
121
122    #[test]
123    fn literal_empty() {
124        assert_eq!(decode_literal(b"()").unwrap(), b"");
125    }
126
127    #[test]
128    fn literal_simple() {
129        assert_eq!(decode_literal(b"(Hello)").unwrap(), b"Hello");
130    }
131
132    #[test]
133    fn literal_nested_parens() {
134        assert_eq!(
135            decode_literal(b"(Hi (()) there)").unwrap(),
136            b"Hi (()) there"
137        );
138    }
139
140    #[test]
141    fn literal_escape_n() {
142        assert_eq!(decode_literal(b"(a\\nb)").unwrap(), b"a\nb");
143    }
144
145    #[test]
146    fn literal_escape_r() {
147        assert_eq!(decode_literal(b"(a\\rb)").unwrap(), b"a\rb");
148    }
149
150    #[test]
151    fn literal_escape_t() {
152        assert_eq!(decode_literal(b"(a\\tb)").unwrap(), b"a\tb");
153    }
154
155    #[test]
156    fn literal_escape_b() {
157        assert_eq!(decode_literal(b"(a\\bb)").unwrap(), &[b'a', 0x08, b'b']);
158    }
159
160    #[test]
161    fn literal_escape_f() {
162        assert_eq!(decode_literal(b"(a\\fb)").unwrap(), &[b'a', 0x0C, b'b']);
163    }
164
165    #[test]
166    fn literal_escape_backslash() {
167        assert_eq!(decode_literal(b"(a\\\\b)").unwrap(), b"a\\b");
168    }
169
170    #[test]
171    fn literal_escape_parens() {
172        assert_eq!(decode_literal(b"(Hi \\()").unwrap(), b"Hi (");
173    }
174
175    #[test]
176    fn literal_octal_three_digits() {
177        assert_eq!(decode_literal(b"(\\053)").unwrap(), b"+");
178    }
179
180    #[test]
181    fn literal_octal_two_digits() {
182        assert_eq!(decode_literal(b"(\\36)").unwrap(), b"\x1e");
183    }
184
185    #[test]
186    fn literal_octal_one_digit() {
187        assert_eq!(decode_literal(b"(\\3)").unwrap(), b"\x03");
188    }
189
190    #[test]
191    fn literal_line_continuation_lf() {
192        assert_eq!(decode_literal(b"(Hi \\\nthere)").unwrap(), b"Hi there");
193    }
194
195    #[test]
196    fn literal_line_continuation_cr() {
197        assert_eq!(decode_literal(b"(Hi \\\rthere)").unwrap(), b"Hi there");
198    }
199
200    #[test]
201    fn literal_line_continuation_crlf() {
202        assert_eq!(decode_literal(b"(Hi \\\r\nthere)").unwrap(), b"Hi there");
203    }
204
205    #[test]
206    fn literal_bare_eol_lf() {
207        assert_eq!(decode_literal(b"(a\nb)").unwrap(), b"a\nb");
208    }
209
210    #[test]
211    fn literal_bare_eol_cr() {
212        assert_eq!(decode_literal(b"(a\rb)").unwrap(), b"a\nb");
213    }
214
215    #[test]
216    fn literal_bare_eol_crlf() {
217        assert_eq!(decode_literal(b"(a\r\nb)").unwrap(), b"a\nb");
218    }
219
220    fn decode_hex(input: &[u8]) -> Result<Vec<u8>> {
221        let mut r = Reader::new(input);
222        let data = parse_hex(&mut r).ok_or(Error::SyntaxError)?;
223        String::from_hex(data).decode()
224    }
225
226    #[test]
227    fn hex_simple() {
228        assert_eq!(decode_hex(b"<48656C6C6F>").unwrap(), b"Hello");
229    }
230
231    #[test]
232    fn hex_with_whitespace() {
233        assert_eq!(decode_hex(b"<48 65 6C 6C 6F>").unwrap(), b"Hello");
234    }
235
236    #[test]
237    fn hex_odd_nibble() {
238        assert_eq!(decode_hex(b"<ABC>").unwrap(), &[0xAB, 0xC0]);
239    }
240
241    #[test]
242    fn hex_empty() {
243        assert_eq!(decode_hex(b"<>").unwrap(), b"");
244    }
245
246    #[test]
247    fn hex_lowercase() {
248        assert_eq!(decode_hex(b"<abcd>").unwrap(), &[0xAB, 0xCD]);
249    }
250
251    #[test]
252    fn hex_mixed_case() {
253        assert_eq!(decode_hex(b"<aB3E>").unwrap(), &[0xAB, 0x3E]);
254    }
255
256    fn decode_a85(input: &[u8]) -> Result<Vec<u8>> {
257        let mut r = Reader::new(input);
258        let data = parse_ascii85(&mut r).ok_or(Error::SyntaxError)?;
259        String::from_ascii85(data).decode()
260    }
261
262    #[test]
263    fn ascii85_simple() {
264        // "Hello" in ASCII85 is "87cURDZ"
265        assert_eq!(decode_a85(b"<~87cURDZ~>").unwrap(), b"Hello");
266    }
267
268    #[test]
269    fn ascii85_empty() {
270        assert_eq!(decode_a85(b"<~~>").unwrap(), b"");
271    }
272
273    #[test]
274    fn ascii85_z_shorthand() {
275        assert_eq!(decode_a85(b"<~z~>").unwrap(), &[0, 0, 0, 0]);
276    }
277
278    #[test]
279    fn ascii85_partial_group() {
280        let result = decode_a85(b"<~87~>").unwrap();
281        assert_eq!(result.len(), 1);
282    }
283
284    #[test]
285    fn ascii85_with_whitespace() {
286        assert_eq!(decode_a85(b"<~87cU RDZ~>").unwrap(), b"Hello");
287    }
288}