Skip to main content

pdf_syntax/object/
name.rs

1//! Names.
2
3use crate::filter::ascii_hex::decode_hex_digit;
4use crate::object::Object;
5use crate::object::macros::object;
6use crate::reader::Reader;
7use crate::reader::{Readable, ReaderContext, Skippable};
8use crate::trivia::is_regular_character;
9use core::borrow::Borrow;
10use core::fmt::{self, Debug, Formatter};
11use core::hash::Hash;
12use core::ops::Deref;
13use smallvec::SmallVec;
14
15type NameInner = SmallVec<[u8; 23]>;
16
17/// A PDF name object.
18#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
19pub struct Name(NameInner);
20
21impl Deref for Name {
22    type Target = [u8];
23
24    fn deref(&self) -> &Self::Target {
25        &self.0
26    }
27}
28
29impl AsRef<[u8]> for Name {
30    fn as_ref(&self) -> &[u8] {
31        &self.0
32    }
33}
34
35impl Borrow<[u8]> for Name {
36    fn borrow(&self) -> &[u8] {
37        &self.0
38    }
39}
40
41impl Name {
42    /// Create a new name from a sequence of bytes.
43    pub fn new(data: &[u8]) -> Self {
44        if !data.contains(&b'#') {
45            Self(SmallVec::from_slice(data))
46        } else {
47            let mut result = SmallVec::new();
48            let mut r = Reader::new(data);
49
50            while let Some(b) = r.read_byte() {
51                if b == b'#' {
52                    // Per PDF 1.2+ spec, `#` introduces a 2-hex-digit escape. Legacy
53                    // PDF 1.0/1.1 files (and some malformed 1.2+ files) use `#` as a
54                    // literal character. Match the lenient skip_name_like behaviour:
55                    // if the next 2 bytes aren't both hex digits, treat `#` as literal.
56                    match r.peek_bytes(2) {
57                        Some(hex) if hex[0].is_ascii_hexdigit() && hex[1].is_ascii_hexdigit() => {
58                            let hex = r.read_bytes(2).unwrap();
59                            result.push(
60                                decode_hex_digit(hex[0]).unwrap() << 4
61                                    | decode_hex_digit(hex[1]).unwrap(),
62                            );
63                        }
64                        _ => {
65                            result.push(b'#');
66                        }
67                    }
68                } else {
69                    result.push(b);
70                }
71            }
72
73            Self(result)
74        }
75    }
76
77    /// Return a string representation of the name.
78    ///
79    /// Returns a placeholder in case the name is not UTF-8 encoded.
80    pub fn as_str(&self) -> &str {
81        core::str::from_utf8(&self.0).unwrap_or("{non-ascii key}")
82    }
83}
84
85impl Debug for Name {
86    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
87        match core::str::from_utf8(&self.0) {
88            Ok(s) => <str as Debug>::fmt(s, f),
89            Err(_) => <[u8] as Debug>::fmt(&self.0, f),
90        }
91    }
92}
93
94object!(Name, Name);
95
96impl Skippable for Name {
97    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
98        skip_name_like(r, true).map(|_| ())
99    }
100}
101
102impl Readable<'_> for Name {
103    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
104        let start = r.offset();
105        skip_name_like(r, true)?;
106        let end = r.offset();
107
108        // Exclude leading solidus.
109        let data = r.range(start + 1..end)?;
110        Some(Self::new(data))
111    }
112}
113
114// This method is shared by `Name` and the parser for content stream operators (which behave like
115// names, except that they aren't preceded by a solidus.
116pub(crate) fn skip_name_like(r: &mut Reader<'_>, solidus: bool) -> Option<()> {
117    if solidus {
118        r.forward_tag(b"/")?;
119    }
120
121    let old = r.offset();
122
123    while let Some(b) = r.eat(is_regular_character) {
124        if b == b'#' {
125            // Per PDF 1.2+ spec, `#` introduces a 2-hex-digit escape. Legacy PDF 1.0/1.1
126            // files (header `%PDF-1.0`) predate this convention and use `#` as a literal
127            // regular character (e.g. QDF output uses `/Im#1`, `/Im#` as XObject names).
128            // Be lenient: consume the escape only when both following bytes are hex
129            // digits; otherwise accept `#` as a literal. Matches MuPDF/pdfium behaviour.
130            if let Some(hex) = r.peek_bytes(2)
131                && hex[0].is_ascii_hexdigit()
132                && hex[1].is_ascii_hexdigit()
133            {
134                r.read_byte();
135                r.read_byte();
136            }
137        }
138    }
139
140    if !solidus && old == r.offset() {
141        return None;
142    }
143
144    Some(())
145}
146
147#[cfg(test)]
148mod tests {
149    use crate::object::Name;
150    use crate::reader::Reader;
151    use crate::reader::ReaderExt;
152    use std::ops::Deref;
153
154    #[test]
155    fn name_1() {
156        assert_eq!(
157            Reader::new("/".as_bytes())
158                .read_without_context::<Name>()
159                .unwrap()
160                .deref(),
161            b""
162        );
163    }
164
165    #[test]
166    fn name_2() {
167        assert!(
168            Reader::new("dfg".as_bytes())
169                .read_without_context::<Name>()
170                .is_none()
171        );
172    }
173
174    #[test]
175    fn name_3() {
176        // `#` without two trailing hex digits is accepted leniently as a literal
177        // character (PDF 1.0/1.1 compatibility — see skip_name_like rationale).
178        assert_eq!(
179            Reader::new("/AB#FG".as_bytes())
180                .read_without_context::<Name>()
181                .unwrap()
182                .deref(),
183            b"AB#FG"
184        );
185    }
186
187    #[test]
188    fn name_18_trailing_hash() {
189        // QDF-style names used in PDF 1.0 documents (0555.pdf):
190        // `#` at end of name is literal when no hex digits follow.
191        assert_eq!(
192            Reader::new("/Im# ".as_bytes())
193                .read_without_context::<Name>()
194                .unwrap()
195                .deref(),
196            b"Im#"
197        );
198    }
199
200    #[test]
201    fn name_19_single_digit_after_hash() {
202        // `/Im#1 ` — one hex digit then whitespace: `#` is literal, `1` is part of name.
203        assert_eq!(
204            Reader::new("/Im#1 ".as_bytes())
205                .read_without_context::<Name>()
206                .unwrap()
207                .deref(),
208            b"Im#1"
209        );
210    }
211
212    #[test]
213    fn name_4() {
214        assert_eq!(
215            Reader::new("/Name1".as_bytes())
216                .read_without_context::<Name>()
217                .unwrap()
218                .deref(),
219            b"Name1"
220        );
221    }
222
223    #[test]
224    fn name_5() {
225        assert_eq!(
226            Reader::new("/ASomewhatLongerName".as_bytes())
227                .read_without_context::<Name>()
228                .unwrap()
229                .deref(),
230            b"ASomewhatLongerName"
231        );
232    }
233
234    #[test]
235    fn name_6() {
236        assert_eq!(
237            Reader::new("/A;Name_With-Various***Characters?".as_bytes())
238                .read_without_context::<Name>()
239                .unwrap()
240                .deref(),
241            b"A;Name_With-Various***Characters?"
242        );
243    }
244
245    #[test]
246    fn name_7() {
247        assert_eq!(
248            Reader::new("/1.2".as_bytes())
249                .read_without_context::<Name>()
250                .unwrap()
251                .deref(),
252            b"1.2"
253        );
254    }
255
256    #[test]
257    fn name_8() {
258        assert_eq!(
259            Reader::new("/$$".as_bytes())
260                .read_without_context::<Name>()
261                .unwrap()
262                .deref(),
263            b"$$"
264        );
265    }
266
267    #[test]
268    fn name_9() {
269        assert_eq!(
270            Reader::new("/@pattern".as_bytes())
271                .read_without_context::<Name>()
272                .unwrap()
273                .deref(),
274            b"@pattern"
275        );
276    }
277
278    #[test]
279    fn name_10() {
280        assert_eq!(
281            Reader::new("/.notdef".as_bytes())
282                .read_without_context::<Name>()
283                .unwrap()
284                .deref(),
285            b".notdef"
286        );
287    }
288
289    #[test]
290    fn name_11() {
291        assert_eq!(
292            Reader::new("/lime#20Green".as_bytes())
293                .read_without_context::<Name>()
294                .unwrap()
295                .deref(),
296            b"lime Green"
297        );
298    }
299
300    #[test]
301    fn name_12() {
302        assert_eq!(
303            Reader::new("/paired#28#29parentheses".as_bytes())
304                .read_without_context::<Name>()
305                .unwrap()
306                .deref(),
307            b"paired()parentheses"
308        );
309    }
310
311    #[test]
312    fn name_13() {
313        assert_eq!(
314            Reader::new("/The_Key_of_F#23_Minor".as_bytes())
315                .read_without_context::<Name>()
316                .unwrap()
317                .deref(),
318            b"The_Key_of_F#_Minor"
319        );
320    }
321
322    #[test]
323    fn name_14() {
324        assert_eq!(
325            Reader::new("/A#42".as_bytes())
326                .read_without_context::<Name>()
327                .unwrap()
328                .deref(),
329            b"AB"
330        );
331    }
332
333    #[test]
334    fn name_15() {
335        assert_eq!(
336            Reader::new("/A#3b".as_bytes())
337                .read_without_context::<Name>()
338                .unwrap()
339                .deref(),
340            b"A;"
341        );
342    }
343
344    #[test]
345    fn name_16() {
346        assert_eq!(
347            Reader::new("/A#3B".as_bytes())
348                .read_without_context::<Name>()
349                .unwrap()
350                .deref(),
351            b"A;"
352        );
353    }
354
355    #[test]
356    fn name_17() {
357        assert_eq!(
358            Reader::new("/k1  ".as_bytes())
359                .read_without_context::<Name>()
360                .unwrap()
361                .deref(),
362            b"k1"
363        );
364    }
365}