Skip to main content

pdf_lib_rs/core/objects/
pdf_hex_string.rs

1use std::fmt;
2use crate::core::syntax::CharCodes;
3use crate::utils::{
4    copy_string_into_buffer, has_utf16_bom, pdf_doc_encoding_decode, to_hex_string_of_min_length,
5    utf16_decode, utf16_encode,
6};
7use super::pdf_object::PdfObjectTrait;
8
9/// A PDF hexadecimal string object, e.g., `<4E6F76>`.
10#[derive(Debug, Clone, PartialEq)]
11pub struct PdfHexString {
12    value: String,
13}
14
15impl PdfHexString {
16    pub fn of(value: &str) -> Self {
17        PdfHexString {
18            value: value.to_string(),
19        }
20    }
21
22    /// Create from text, encoding as UTF-16BE with BOM.
23    pub fn from_text(text: &str) -> Self {
24        let encoded = utf16_encode(text);
25        let mut hex = String::new();
26        for unit in &encoded {
27            hex.push_str(&to_hex_string_of_min_length(*unit, 4));
28        }
29        PdfHexString { value: hex }
30    }
31
32    /// Get the raw hex string value.
33    pub fn as_string(&self) -> &str {
34        &self.value
35    }
36
37    /// Convert hex string to raw bytes.
38    pub fn as_bytes_decoded(&self) -> Vec<u8> {
39        // Append a zero if odd number of digits (PDF spec 7.3.4.3)
40        let hex = if self.value.len() % 2 == 1 {
41            format!("{}0", self.value)
42        } else {
43            self.value.clone()
44        };
45
46        let mut bytes = Vec::with_capacity(hex.len() / 2);
47        let mut i = 0;
48        while i + 1 < hex.len() {
49            if let Ok(byte) = u8::from_str_radix(&hex[i..i + 2], 16) {
50                bytes.push(byte);
51            } else {
52                bytes.push(0);
53            }
54            i += 2;
55        }
56        bytes
57    }
58
59    /// Decode hex string to text, handling UTF-16 and PDFDocEncoding.
60    pub fn decode_text(&self) -> String {
61        let bytes = self.as_bytes_decoded();
62        if has_utf16_bom(&bytes) {
63            utf16_decode(&bytes)
64        } else {
65            pdf_doc_encoding_decode(&bytes)
66        }
67    }
68}
69
70impl fmt::Display for PdfHexString {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72        write!(f, "<{}>", self.value)
73    }
74}
75
76impl PdfObjectTrait for PdfHexString {
77    fn size_in_bytes(&self) -> usize {
78        self.value.len() + 2
79    }
80
81    fn copy_bytes_into(&self, buffer: &mut [u8], offset: usize) -> usize {
82        let mut off = offset;
83        buffer[off] = CharCodes::LessThan;
84        off += 1;
85        off += copy_string_into_buffer(&self.value, buffer, off);
86        buffer[off] = CharCodes::GreaterThan;
87        self.value.len() + 2
88    }
89}
90
91#[cfg(test)]
92mod tests {
93    use super::*;
94    use crate::utils::typed_array_for;
95
96    #[test]
97    fn can_be_constructed() {
98        let _ = PdfHexString::of("4E6F762073686D6F7A2");
99        let _ = PdfHexString::of("901FA3");
100        let _ = PdfHexString::of("901FA");
101    }
102
103    #[test]
104    fn can_be_constructed_from_text() {
105        assert_eq!(PdfHexString::from_text("").to_string(), "<FEFF>");
106    }
107
108    #[test]
109    fn can_handle_even_hex_digits() {
110        let hex = "FEFF0045006700670020D83CDF73";
111        let bytes = PdfHexString::of(hex).as_bytes_decoded();
112        assert_eq!(
113            bytes,
114            vec![0xFE, 0xFF, 0x00, 0x45, 0x00, 0x67, 0x00, 0x67, 0x00, 0x20, 0xD8, 0x3C, 0xDF, 0x73]
115        );
116    }
117
118    #[test]
119    fn can_handle_odd_hex_digits() {
120        let hex = "6145627300623";
121        let bytes = PdfHexString::of(hex).as_bytes_decoded();
122        assert_eq!(bytes, vec![0x61, 0x45, 0x62, 0x73, 0x00, 0x62, 0x30]);
123    }
124
125    #[test]
126    fn can_decode_utf16be_string() {
127        let hex = "FEFF0045006700670020D83CDF73";
128        assert_eq!(PdfHexString::of(hex).decode_text(), "Egg 🍳");
129    }
130
131    #[test]
132    fn can_decode_utf16le_string() {
133        let hex = "FFFE45006700670020003CD873DF";
134        assert_eq!(PdfHexString::of(hex).decode_text(), "Egg 🍳");
135    }
136
137    #[test]
138    fn can_decode_pdfdocencoded_string() {
139        let hex = "61456273006236";
140        assert_eq!(PdfHexString::of(hex).decode_text(), "aEbs\0b6");
141    }
142
143    #[test]
144    fn can_get_raw_string() {
145        assert_eq!(PdfHexString::of("901FA").as_string(), "901FA");
146    }
147
148    #[test]
149    fn can_be_cloned() {
150        let original = PdfHexString::of("901FA");
151        let clone = original.clone();
152        assert_eq!(clone.to_string(), original.to_string());
153    }
154
155    #[test]
156    fn can_be_converted_to_string() {
157        assert_eq!(
158            PdfHexString::of("4E6F762073686D6F7A2").to_string(),
159            "<4E6F762073686D6F7A2>"
160        );
161        assert_eq!(PdfHexString::of("901FA3").to_string(), "<901FA3>");
162        assert_eq!(PdfHexString::of("901FA").to_string(), "<901FA>");
163    }
164
165    #[test]
166    fn can_provide_size_in_bytes() {
167        assert_eq!(PdfHexString::of("4E6F762073686D6F7A2").size_in_bytes(), 21);
168        assert_eq!(PdfHexString::of("901FA3").size_in_bytes(), 8);
169        assert_eq!(PdfHexString::of("901FA").size_in_bytes(), 7);
170    }
171
172    #[test]
173    fn can_be_serialized() {
174        let mut buffer = vec![b' '; 11];
175        assert_eq!(PdfHexString::of("901FA").copy_bytes_into(&mut buffer, 3), 7);
176        assert_eq!(buffer, typed_array_for("   <901FA> "));
177    }
178}