pdf_lib_rs/core/objects/
pdf_string.rs1use std::fmt;
2use crate::core::syntax::CharCodes;
3use crate::utils::{
4 copy_string_into_buffer, has_utf16_bom, pdf_doc_encoding_decode, utf16_decode,
5};
6use super::pdf_object::PdfObjectTrait;
7
8#[derive(Debug, Clone, PartialEq)]
10pub struct PdfString {
11 value: String,
12}
13
14impl PdfString {
15 pub fn of(value: &str) -> Self {
16 PdfString {
17 value: value.to_string(),
18 }
19 }
20
21 pub fn as_string(&self) -> &str {
23 &self.value
24 }
25
26 pub fn as_bytes_decoded(&self) -> Vec<u8> {
29 let mut bytes = Vec::new();
30 let chars: Vec<u8> = self.value.chars().map(|c| c as u8).collect();
31 let mut i = 0;
32 let mut escaped = false;
33 let mut octal = String::new();
34
35 while i < chars.len() {
36 let byte = chars[i];
37 let next_byte = chars.get(i + 1).copied();
38
39 if !escaped {
40 if byte == CharCodes::BackSlash {
41 escaped = true;
42 } else {
43 bytes.push(byte);
44 }
45 } else {
46 match byte {
47 CharCodes::Newline | CharCodes::CarriageReturn => {
48 escaped = false;
50 }
51 b'n' => {
52 bytes.push(CharCodes::Newline);
53 escaped = false;
54 }
55 b'r' => {
56 bytes.push(CharCodes::CarriageReturn);
57 escaped = false;
58 }
59 b't' => {
60 bytes.push(CharCodes::Tab);
61 escaped = false;
62 }
63 b'b' => {
64 bytes.push(CharCodes::Backspace);
65 escaped = false;
66 }
67 b'f' => {
68 bytes.push(CharCodes::FormFeed);
69 escaped = false;
70 }
71 CharCodes::LeftParen => {
72 bytes.push(CharCodes::LeftParen);
73 escaped = false;
74 }
75 CharCodes::RightParen => {
76 bytes.push(CharCodes::RightParen);
77 escaped = false;
78 }
79 CharCodes::BackSlash => {
80 bytes.push(CharCodes::BackSlash);
81 escaped = false;
82 }
83 b'0'..=b'7' => {
84 octal.push(byte as char);
85 if octal.len() == 3
86 || !matches!(next_byte, Some(b'0'..=b'7'))
87 {
88 if let Ok(val) = u8::from_str_radix(&octal, 8) {
89 bytes.push(val);
90 }
91 octal.clear();
92 escaped = false;
93 }
94 }
95 _ => {
96 bytes.push(byte);
97 escaped = false;
98 }
99 }
100 }
101 i += 1;
102 }
103
104 bytes
105 }
106
107 pub fn decode_text(&self) -> String {
109 let bytes = self.as_bytes_decoded();
110 if has_utf16_bom(&bytes) {
111 utf16_decode(&bytes)
112 } else {
113 pdf_doc_encoding_decode(&bytes)
114 }
115 }
116}
117
118impl fmt::Display for PdfString {
119 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
120 write!(f, "({})", self.value)
121 }
122}
123
124impl PdfObjectTrait for PdfString {
125 fn size_in_bytes(&self) -> usize {
126 self.value.len() + 2
127 }
128
129 fn copy_bytes_into(&self, buffer: &mut [u8], offset: usize) -> usize {
130 let mut off = offset;
131 buffer[off] = CharCodes::LeftParen;
132 off += 1;
133 off += copy_string_into_buffer(&self.value, buffer, off);
134 buffer[off] = CharCodes::RightParen;
135 self.value.len() + 2
136 }
137}
138
139#[cfg(test)]
140mod tests {
141 use super::*;
142 use crate::utils::typed_array_for;
143
144 #[test]
145 fn can_be_constructed() {
146 let _ = PdfString::of("foobar");
147 let _ = PdfString::of(" (foo(bar))");
148 let _ = PdfString::of(")b\\a/z(");
149 }
150
151 #[test]
152 fn can_be_converted_to_raw_string() {
153 assert_eq!(PdfString::of("foobar").as_string(), "foobar");
154 }
155
156 #[test]
157 fn can_be_cloned() {
158 let original = PdfString::of(")b\\a/z(");
159 let clone = original.clone();
160 assert_eq!(clone.to_string(), original.to_string());
161 }
162
163 #[test]
164 fn can_be_converted_to_string() {
165 assert_eq!(PdfString::of("foobar").to_string(), "(foobar)");
166 }
167
168 #[test]
169 fn does_not_escape_backslashes() {
170 assert_eq!(
171 PdfString::of("Foo\\Bar\\Qux").to_string(),
172 "(Foo\\Bar\\Qux)"
173 );
174 }
175
176 #[test]
177 fn does_not_escape_nested_parenthesis() {
178 assert_eq!(
179 PdfString::of("(Foo((Bar))Qux)").to_string(),
180 "((Foo((Bar))Qux))"
181 );
182 }
183
184 #[test]
185 fn can_interpret_escaped_octal_codes() {
186 let literal =
187 "\\376\\377\\000\\105\\000\\147\\000\\147\\000\\040\\330\\074\\337\\163";
188 let bytes = PdfString::of(literal).as_bytes_decoded();
189 assert_eq!(
190 bytes,
191 vec![
192 0o376, 0o377, 0o000, 0o105, 0o000, 0o147, 0o000, 0o147, 0o000, 0o040,
193 0o330, 0o074, 0o337, 0o163,
194 ]
195 );
196 }
197
198 #[test]
199 fn can_interpret_eols_and_line_breaks() {
200 let literal = "a\nb\rc\\\nd\\\re";
201 let bytes = PdfString::of(literal).as_bytes_decoded();
202 assert_eq!(
203 bytes,
204 vec![
205 b'a', b'\n', b'b', b'\r', b'c', b'd', b'e',
206 ]
207 );
208 }
209
210 #[test]
211 fn can_interpret_invalid_escapes() {
212 let literal = "a\nb\rc\\xd\\;";
213 let bytes = PdfString::of(literal).as_bytes_decoded();
214 assert_eq!(
215 bytes,
216 vec![b'a', b'\n', b'b', b'\r', b'c', b'x', b'd', b';']
217 );
218 }
219
220 #[test]
221 fn can_provide_size_in_bytes() {
222 assert_eq!(PdfString::of("foobar").size_in_bytes(), 8);
223 assert_eq!(PdfString::of(" (foo(bar))").size_in_bytes(), 13);
224 assert_eq!(PdfString::of(")b\\a/z(").size_in_bytes(), 9);
225 }
226
227 #[test]
228 fn can_be_serialized() {
229 let mut buffer = vec![b' '; 20];
230 assert_eq!(
231 PdfString::of(")(b\\a/))z(").copy_bytes_into(&mut buffer, 3),
232 12
233 );
234 assert_eq!(buffer, typed_array_for(" ()(b\\a/))z() "));
235 }
236
237 #[test]
238 fn can_decode_utf16be_strings() {
239 let literal =
240 "\\376\\377\\000\\105\\000\\147\\000\\147\\000\\040\\330\\074\\337\\163";
241 let text = PdfString::of(literal).decode_text();
242 assert_eq!(text, "Egg 🍳");
243 }
244
245 #[test]
246 fn can_decode_pdfdocencoded_strings() {
247 let literal = "a\\105b\\163\\0b6";
248 let text = PdfString::of(literal).decode_text();
249 assert_eq!(text, "aEbs\0b6");
250 }
251}