Skip to main content

pdf_syntax/object/
name.rs

1//! Names.
2
3use crate::filter::ascii_hex::decode_hex_digit;
4use crate::object::Object;
5use crate::object::macros::object;
6use crate::reader::Reader;
7use crate::reader::{Readable, ReaderContext, Skippable};
8use crate::trivia::is_regular_character;
9use core::borrow::Borrow;
10use core::fmt::{self, Debug, Formatter};
11use core::hash::Hash;
12use core::ops::Deref;
13use smallvec::SmallVec;
14
15type NameInner = SmallVec<[u8; 23]>;
16
17/// A PDF name object.
18#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
19pub struct Name(NameInner);
20
21impl Deref for Name {
22    type Target = [u8];
23
24    fn deref(&self) -> &Self::Target {
25        &self.0
26    }
27}
28
29impl AsRef<[u8]> for Name {
30    fn as_ref(&self) -> &[u8] {
31        &self.0
32    }
33}
34
35impl Borrow<[u8]> for Name {
36    fn borrow(&self) -> &[u8] {
37        &self.0
38    }
39}
40
41impl Name {
42    /// Create a new name from a sequence of bytes.
43    pub fn new(data: &[u8]) -> Self {
44        if !data.contains(&b'#') {
45            Self(SmallVec::from_slice(data))
46        } else {
47            let mut result = SmallVec::new();
48            let mut r = Reader::new(data);
49
50            while let Some(b) = r.read_byte() {
51                if b == b'#' {
52                    // Per PDF 1.2+ spec, `#` introduces a 2-hex-digit escape. Legacy
53                    // PDF 1.0/1.1 files (and some malformed 1.2+ files) use `#` as a
54                    // literal character. Match the lenient skip_name_like behaviour:
55                    // if the next 2 bytes aren't both hex digits, treat `#` as literal.
56                    match r.peek_bytes(2) {
57                        Some(hex)
58                            if hex[0].is_ascii_hexdigit() && hex[1].is_ascii_hexdigit() =>
59                        {
60                            let hex = r.read_bytes(2).unwrap();
61                            result.push(
62                                decode_hex_digit(hex[0]).unwrap() << 4
63                                    | decode_hex_digit(hex[1]).unwrap(),
64                            );
65                        }
66                        _ => {
67                            result.push(b'#');
68                        }
69                    }
70                } else {
71                    result.push(b);
72                }
73            }
74
75            Self(result)
76        }
77    }
78
79    /// Return a string representation of the name.
80    ///
81    /// Returns a placeholder in case the name is not UTF-8 encoded.
82    pub fn as_str(&self) -> &str {
83        core::str::from_utf8(&self.0).unwrap_or("{non-ascii key}")
84    }
85}
86
87impl Debug for Name {
88    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
89        match core::str::from_utf8(&self.0) {
90            Ok(s) => <str as Debug>::fmt(s, f),
91            Err(_) => <[u8] as Debug>::fmt(&self.0, f),
92        }
93    }
94}
95
96object!(Name, Name);
97
98impl Skippable for Name {
99    fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
100        skip_name_like(r, true).map(|_| ())
101    }
102}
103
104impl Readable<'_> for Name {
105    fn read(r: &mut Reader<'_>, _: &ReaderContext<'_>) -> Option<Self> {
106        let start = r.offset();
107        skip_name_like(r, true)?;
108        let end = r.offset();
109
110        // Exclude leading solidus.
111        let data = r.range(start + 1..end)?;
112        Some(Self::new(data))
113    }
114}
115
116// This method is shared by `Name` and the parser for content stream operators (which behave like
117// names, except that they aren't preceded by a solidus.
118pub(crate) fn skip_name_like(r: &mut Reader<'_>, solidus: bool) -> Option<()> {
119    if solidus {
120        r.forward_tag(b"/")?;
121    }
122
123    let old = r.offset();
124
125    while let Some(b) = r.eat(is_regular_character) {
126        if b == b'#' {
127            // Per PDF 1.2+ spec, `#` introduces a 2-hex-digit escape. Legacy PDF 1.0/1.1
128            // files (header `%PDF-1.0`) predate this convention and use `#` as a literal
129            // regular character (e.g. QDF output uses `/Im#1`, `/Im#` as XObject names).
130            // Be lenient: consume the escape only when both following bytes are hex
131            // digits; otherwise accept `#` as a literal. Matches MuPDF/pdfium behaviour.
132            if let Some(hex) = r.peek_bytes(2)
133                && hex[0].is_ascii_hexdigit()
134                && hex[1].is_ascii_hexdigit()
135            {
136                r.read_byte();
137                r.read_byte();
138            }
139        }
140    }
141
142    if !solidus && old == r.offset() {
143        return None;
144    }
145
146    Some(())
147}
148
149#[cfg(test)]
150mod tests {
151    use crate::object::Name;
152    use crate::reader::Reader;
153    use crate::reader::ReaderExt;
154    use std::ops::Deref;
155
156    #[test]
157    fn name_1() {
158        assert_eq!(
159            Reader::new("/".as_bytes())
160                .read_without_context::<Name>()
161                .unwrap()
162                .deref(),
163            b""
164        );
165    }
166
167    #[test]
168    fn name_2() {
169        assert!(
170            Reader::new("dfg".as_bytes())
171                .read_without_context::<Name>()
172                .is_none()
173        );
174    }
175
176    #[test]
177    fn name_3() {
178        // `#` without two trailing hex digits is accepted leniently as a literal
179        // character (PDF 1.0/1.1 compatibility — see skip_name_like rationale).
180        assert_eq!(
181            Reader::new("/AB#FG".as_bytes())
182                .read_without_context::<Name>()
183                .unwrap()
184                .deref(),
185            b"AB#FG"
186        );
187    }
188
189    #[test]
190    fn name_18_trailing_hash() {
191        // QDF-style names used in PDF 1.0 documents (0555.pdf):
192        // `#` at end of name is literal when no hex digits follow.
193        assert_eq!(
194            Reader::new("/Im# ".as_bytes())
195                .read_without_context::<Name>()
196                .unwrap()
197                .deref(),
198            b"Im#"
199        );
200    }
201
202    #[test]
203    fn name_19_single_digit_after_hash() {
204        // `/Im#1 ` — one hex digit then whitespace: `#` is literal, `1` is part of name.
205        assert_eq!(
206            Reader::new("/Im#1 ".as_bytes())
207                .read_without_context::<Name>()
208                .unwrap()
209                .deref(),
210            b"Im#1"
211        );
212    }
213
214    #[test]
215    fn name_4() {
216        assert_eq!(
217            Reader::new("/Name1".as_bytes())
218                .read_without_context::<Name>()
219                .unwrap()
220                .deref(),
221            b"Name1"
222        );
223    }
224
225    #[test]
226    fn name_5() {
227        assert_eq!(
228            Reader::new("/ASomewhatLongerName".as_bytes())
229                .read_without_context::<Name>()
230                .unwrap()
231                .deref(),
232            b"ASomewhatLongerName"
233        );
234    }
235
236    #[test]
237    fn name_6() {
238        assert_eq!(
239            Reader::new("/A;Name_With-Various***Characters?".as_bytes())
240                .read_without_context::<Name>()
241                .unwrap()
242                .deref(),
243            b"A;Name_With-Various***Characters?"
244        );
245    }
246
247    #[test]
248    fn name_7() {
249        assert_eq!(
250            Reader::new("/1.2".as_bytes())
251                .read_without_context::<Name>()
252                .unwrap()
253                .deref(),
254            b"1.2"
255        );
256    }
257
258    #[test]
259    fn name_8() {
260        assert_eq!(
261            Reader::new("/$$".as_bytes())
262                .read_without_context::<Name>()
263                .unwrap()
264                .deref(),
265            b"$$"
266        );
267    }
268
269    #[test]
270    fn name_9() {
271        assert_eq!(
272            Reader::new("/@pattern".as_bytes())
273                .read_without_context::<Name>()
274                .unwrap()
275                .deref(),
276            b"@pattern"
277        );
278    }
279
280    #[test]
281    fn name_10() {
282        assert_eq!(
283            Reader::new("/.notdef".as_bytes())
284                .read_without_context::<Name>()
285                .unwrap()
286                .deref(),
287            b".notdef"
288        );
289    }
290
291    #[test]
292    fn name_11() {
293        assert_eq!(
294            Reader::new("/lime#20Green".as_bytes())
295                .read_without_context::<Name>()
296                .unwrap()
297                .deref(),
298            b"lime Green"
299        );
300    }
301
302    #[test]
303    fn name_12() {
304        assert_eq!(
305            Reader::new("/paired#28#29parentheses".as_bytes())
306                .read_without_context::<Name>()
307                .unwrap()
308                .deref(),
309            b"paired()parentheses"
310        );
311    }
312
313    #[test]
314    fn name_13() {
315        assert_eq!(
316            Reader::new("/The_Key_of_F#23_Minor".as_bytes())
317                .read_without_context::<Name>()
318                .unwrap()
319                .deref(),
320            b"The_Key_of_F#_Minor"
321        );
322    }
323
324    #[test]
325    fn name_14() {
326        assert_eq!(
327            Reader::new("/A#42".as_bytes())
328                .read_without_context::<Name>()
329                .unwrap()
330                .deref(),
331            b"AB"
332        );
333    }
334
335    #[test]
336    fn name_15() {
337        assert_eq!(
338            Reader::new("/A#3b".as_bytes())
339                .read_without_context::<Name>()
340                .unwrap()
341                .deref(),
342            b"A;"
343        );
344    }
345
346    #[test]
347    fn name_16() {
348        assert_eq!(
349            Reader::new("/A#3B".as_bytes())
350                .read_without_context::<Name>()
351                .unwrap()
352                .deref(),
353            b"A;"
354        );
355    }
356
357    #[test]
358    fn name_17() {
359        assert_eq!(
360            Reader::new("/k1  ".as_bytes())
361                .read_without_context::<Name>()
362                .unwrap()
363                .deref(),
364            b"k1"
365        );
366    }
367}