Skip to main content

pdf_lib_rs/core/objects/
pdf_name.rs

1use std::fmt;
2use crate::core::syntax::is_regular_char;
3use crate::utils::{copy_string_into_buffer, to_hex_string};
4use super::pdf_object::PdfObjectTrait;
5
6/// Check if a byte is an uppercase hex digit (0-9, A-F).
7fn is_uppercase_hex(b: u8) -> bool {
8    b.is_ascii_digit() || (b'A'..=b'F').contains(&b)
9}
10
11/// Decode hex codes in a PDF name (e.g., "#20" → " ").
12/// Only decodes `#XX` where XX are uppercase hex digits [0-9A-F],
13/// matching pdf-lib's regex: `/#([\dABCDEF]{2})/g`.
14fn decode_name(name: &str) -> String {
15    let mut result = String::new();
16    let bytes = name.as_bytes();
17    let mut i = 0;
18    while i < bytes.len() {
19        if bytes[i] == b'#'
20            && i + 2 < bytes.len()
21            && is_uppercase_hex(bytes[i + 1])
22            && is_uppercase_hex(bytes[i + 2])
23        {
24            let hex = &name[i + 1..i + 3];
25            if let Ok(byte) = u8::from_str_radix(hex, 16) {
26                result.push(byte as char);
27                i += 3;
28                continue;
29            }
30        }
31        result.push(bytes[i] as char);
32        i += 1;
33    }
34    result
35}
36
37/// A PDF Name object (e.g., /Type, /Page).
38///
39/// Names are interned — calling `PdfName::of("Foo")` twice returns equal values.
40/// Hex codes in names (like `#20` for space) are decoded on construction.
41#[derive(Clone, PartialEq, Eq, Hash)]
42pub struct PdfName {
43    /// The encoded form including the leading slash, e.g., "/Foo#20Bar"
44    encoded_name: String,
45}
46
47impl PdfName {
48    /// Create a PdfName from a raw name string (without leading slash).
49    /// Hex codes like `#20` are decoded.
50    pub fn of(name: &str) -> Self {
51        let decoded_value = decode_name(name);
52
53        let mut encoded_name = String::from("/");
54        for ch in decoded_value.chars() {
55            let code = ch as u8;
56            if is_regular_char(code) {
57                encoded_name.push(ch);
58            } else {
59                encoded_name.push('#');
60                encoded_name.push_str(&to_hex_string(code));
61            }
62        }
63
64        PdfName { encoded_name }
65    }
66
67    /// Get the encoded name string (with leading slash).
68    pub fn as_string(&self) -> &str {
69        &self.encoded_name
70    }
71
72    /// Decode the name to raw bytes (without the leading slash).
73    pub fn as_bytes_decoded(&self) -> Vec<u8> {
74        let mut bytes = Vec::new();
75        let chars: Vec<char> = self.encoded_name[1..].chars().collect(); // skip leading /
76        let mut i = 0;
77        while i < chars.len() {
78            if chars[i] == '#' && i + 2 < chars.len() {
79                let hex: String = [chars[i + 1], chars[i + 2]].iter().collect();
80                if let Ok(byte) = u8::from_str_radix(&hex, 16) {
81                    bytes.push(byte);
82                    i += 3;
83                    continue;
84                }
85            }
86            bytes.push(chars[i] as u8);
87            i += 1;
88        }
89        bytes
90    }
91
92    /// Decode the name to a text string.
93    pub fn decode_text(&self) -> String {
94        let bytes = self.as_bytes_decoded();
95        String::from_utf8_lossy(&bytes).to_string()
96    }
97
98    // Common PDF name constants
99    pub fn length() -> Self { Self::of("Length") }
100    pub fn flate_decode() -> Self { Self::of("FlateDecode") }
101    pub fn resources() -> Self { Self::of("Resources") }
102    pub fn font() -> Self { Self::of("Font") }
103    pub fn x_object() -> Self { Self::of("XObject") }
104    pub fn contents() -> Self { Self::of("Contents") }
105    pub fn r#type() -> Self { Self::of("Type") }
106    pub fn parent() -> Self { Self::of("Parent") }
107    pub fn media_box() -> Self { Self::of("MediaBox") }
108    pub fn page() -> Self { Self::of("Page") }
109    pub fn annots() -> Self { Self::of("Annots") }
110    pub fn rotate() -> Self { Self::of("Rotate") }
111    pub fn title() -> Self { Self::of("Title") }
112    pub fn author() -> Self { Self::of("Author") }
113    pub fn subject() -> Self { Self::of("Subject") }
114    pub fn creator() -> Self { Self::of("Creator") }
115    pub fn keywords() -> Self { Self::of("Keywords") }
116    pub fn producer() -> Self { Self::of("Producer") }
117    pub fn creation_date() -> Self { Self::of("CreationDate") }
118    pub fn mod_date() -> Self { Self::of("ModDate") }
119}
120
121impl fmt::Display for PdfName {
122    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
123        write!(f, "{}", self.encoded_name)
124    }
125}
126
127impl fmt::Debug for PdfName {
128    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129        write!(f, "PdfName({})", self.encoded_name)
130    }
131}
132
133impl PdfObjectTrait for PdfName {
134    fn size_in_bytes(&self) -> usize {
135        self.encoded_name.len()
136    }
137
138    fn copy_bytes_into(&self, buffer: &mut [u8], offset: usize) -> usize {
139        copy_string_into_buffer(&self.encoded_name, buffer, offset)
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use crate::utils::typed_array_for;
147
148    #[test]
149    fn can_be_constructed() {
150        let _ = PdfName::of("foobar");
151        let _ = PdfName::of("A;Name_With-***Characters?");
152        let _ = PdfName::of("paired#28#29parentheses");
153    }
154
155    #[test]
156    fn returns_equal_value_for_same_input() {
157        assert_eq!(PdfName::of("foobar"), PdfName::of("foobar"));
158        assert_eq!(
159            PdfName::of("A;Name_With-***Characters?"),
160            PdfName::of("A;Name_With-***Characters?")
161        );
162        assert_eq!(
163            PdfName::of("paired#28#29parentheses"),
164            PdfName::of("paired#28#29parentheses")
165        );
166    }
167
168    #[test]
169    fn decodes_hex_codes_in_values() {
170        assert_eq!(PdfName::of("Lime#20Green"), PdfName::of("Lime Green"));
171        assert_eq!(
172            PdfName::of("paired#28#29parentheses"),
173            PdfName::of("paired()parentheses")
174        );
175        assert_eq!(
176            PdfName::of("The_Key_of_F#23_Minor"),
177            PdfName::of("The_Key_of_F#_Minor")
178        );
179        assert_eq!(PdfName::of("A#42"), PdfName::of("AB"));
180        assert_eq!(PdfName::of("Identity#2DH"), PdfName::of("Identity-H"));
181
182        assert_eq!(PdfName::of("#40"), PdfName::of("@"));
183        assert_eq!(PdfName::of("#41"), PdfName::of("A"));
184        assert_eq!(PdfName::of("#42"), PdfName::of("B"));
185        assert_eq!(PdfName::of("#43"), PdfName::of("C"));
186        assert_eq!(PdfName::of("#44"), PdfName::of("D"));
187        assert_eq!(PdfName::of("#45"), PdfName::of("E"));
188        assert_eq!(PdfName::of("#46"), PdfName::of("F"));
189        assert_eq!(PdfName::of("#47"), PdfName::of("G"));
190        assert_eq!(PdfName::of("#48"), PdfName::of("H"));
191        assert_eq!(PdfName::of("#49"), PdfName::of("I"));
192        assert_eq!(PdfName::of("#4A"), PdfName::of("J"));
193        assert_eq!(PdfName::of("#4B"), PdfName::of("K"));
194        assert_eq!(PdfName::of("#4C"), PdfName::of("L"));
195        assert_eq!(PdfName::of("#4D"), PdfName::of("M"));
196        assert_eq!(PdfName::of("#4E"), PdfName::of("N"));
197        assert_eq!(PdfName::of("#4F"), PdfName::of("O"));
198    }
199
200    #[test]
201    fn encodes_hashes_whitespace_and_delimiters_when_serialized() {
202        assert_eq!(PdfName::of("Foo#").to_string(), "/Foo#23");
203
204        assert_eq!(PdfName::of("Foo\0").to_string(), "/Foo#00");
205        assert_eq!(PdfName::of("Foo\t").to_string(), "/Foo#09");
206        assert_eq!(PdfName::of("Foo\n").to_string(), "/Foo#0A");
207        assert_eq!(PdfName::of("Foo\x0C").to_string(), "/Foo#0C");
208        assert_eq!(PdfName::of("Foo\r").to_string(), "/Foo#0D");
209        assert_eq!(PdfName::of("Foo ").to_string(), "/Foo#20");
210
211        assert_eq!(PdfName::of("Foo(").to_string(), "/Foo#28");
212        assert_eq!(PdfName::of("Foo)").to_string(), "/Foo#29");
213        assert_eq!(PdfName::of("Foo<").to_string(), "/Foo#3C");
214        assert_eq!(PdfName::of("Foo>").to_string(), "/Foo#3E");
215        assert_eq!(PdfName::of("Foo[").to_string(), "/Foo#5B");
216        assert_eq!(PdfName::of("Foo]").to_string(), "/Foo#5D");
217        assert_eq!(PdfName::of("Foo{").to_string(), "/Foo#7B");
218        assert_eq!(PdfName::of("Foo}").to_string(), "/Foo#7D");
219        assert_eq!(PdfName::of("Foo/").to_string(), "/Foo#2F");
220        assert_eq!(PdfName::of("Foo%").to_string(), "/Foo#25");
221    }
222
223    #[test]
224    fn can_be_converted_to_string() {
225        assert_eq!(PdfName::of("foobar").to_string(), "/foobar");
226        assert_eq!(PdfName::of("Lime Green").to_string(), "/Lime#20Green");
227        assert_eq!(
228            PdfName::of("\0\t\n\x0C\r ").to_string(),
229            "/#00#09#0A#0C#0D#20"
230        );
231        assert_eq!(PdfName::of("Foo#Bar").to_string(), "/Foo#23Bar");
232        assert_eq!(
233            PdfName::of("paired()parentheses").to_string(),
234            "/paired#28#29parentheses"
235        );
236        // "The_Key_of_F#23_Minor" → decoded to "The_Key_of_F#_Minor" → re-encoded
237        assert_eq!(
238            PdfName::of("The_Key_of_F#23_Minor").to_string(),
239            "/The_Key_of_F#23_Minor"
240        );
241        assert_eq!(PdfName::of("A#42").to_string(), "/AB");
242    }
243
244    #[test]
245    fn can_provide_size_in_bytes() {
246        assert_eq!(PdfName::of("foobar").size_in_bytes(), 7);
247        assert_eq!(PdfName::of("Lime Green").size_in_bytes(), 13);
248        assert_eq!(PdfName::of("\0\t\n\x0C\r ").size_in_bytes(), 19);
249        assert_eq!(PdfName::of("Foo#Bar").size_in_bytes(), 10);
250        assert_eq!(PdfName::of("paired()parentheses").size_in_bytes(), 24);
251        assert_eq!(PdfName::of("The_Key_of_F#23_Minor").size_in_bytes(), 22);
252        assert_eq!(PdfName::of("A#42").size_in_bytes(), 3);
253    }
254
255    #[test]
256    fn can_be_serialized() {
257        let mut buffer1 = vec![b' '; 23];
258        let written = PdfName::of("\0\t\n\x0C\r ").copy_bytes_into(&mut buffer1, 3);
259        assert_eq!(written, 19);
260        assert_eq!(buffer1, typed_array_for("   /#00#09#0A#0C#0D#20 "));
261
262        let mut buffer2 = vec![b' '; 17];
263        let written = PdfName::of("Lime Green").copy_bytes_into(&mut buffer2, 1);
264        assert_eq!(written, 13);
265        assert_eq!(buffer2, typed_array_for(" /Lime#20Green   "));
266
267        let mut buffer3 = vec![b' '; 7];
268        let written = PdfName::of("A#42").copy_bytes_into(&mut buffer3, 4);
269        assert_eq!(written, 3);
270        assert_eq!(buffer3, typed_array_for("    /AB"));
271    }
272}