1use crate::crypto::DecryptionTarget;
4use crate::filter::ascii_hex;
5use crate::object::Object;
6use crate::object::macros::object;
7use crate::reader::Reader;
8use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
9use core::ops::Deref;
10use log::warn;
11use smallvec::SmallVec;
12
13type StringInner = SmallVec<[u8; 23]>;
14
15#[derive(Clone, Debug, PartialEq, Eq, Hash)]
17pub struct String(StringInner);
18
19impl String {
20 pub fn as_bytes(&self) -> &[u8] {
22 &self.0
23 }
24}
25
26impl Deref for String {
27 type Target = [u8];
28
29 fn deref(&self) -> &Self::Target {
30 &self.0
31 }
32}
33
34impl AsRef<[u8]> for String {
35 fn as_ref(&self) -> &[u8] {
36 &self.0
37 }
38}
39
40object!(String, String);
41
42impl Skippable for String {
43 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
44 match r.peek_byte()? {
45 b'<' => skip_hex(r),
46 b'(' => skip_literal(r),
47 _ => None,
48 }
49 }
50}
51
52impl Readable<'_> for String {
53 fn read(r: &mut Reader<'_>, ctx: &ReaderContext<'_>) -> Option<Self> {
54 let decoded = match r.peek_byte()? {
55 b'<' => read_hex(r)?,
56 b'(' => read_literal(r)?,
57 _ => return None,
58 };
59
60 let final_data = if ctx.xref().needs_decryption(ctx) {
62 if let Some(obj_number) = ctx.obj_number() {
63 ctx.xref()
64 .decrypt(obj_number, &decoded, DecryptionTarget::String)
65 .map(SmallVec::from_vec)
66 .unwrap_or(decoded)
67 } else {
68 decoded
69 }
70 } else {
71 decoded
72 };
73
74 Some(Self(final_data))
75 }
76}
77
78fn skip_hex(r: &mut Reader<'_>) -> Option<()> {
79 r.forward_tag(b"<")?;
80 loop {
84 match r.peek_byte()? {
85 b'>' => break,
86 _ => {
87 r.read_byte()?;
88 }
89 }
90 }
91 r.forward_tag(b">")?;
92
93 Some(())
94}
95
96fn read_hex(r: &mut Reader<'_>) -> Option<StringInner> {
97 let start = r.offset();
98 skip_hex(r)?;
99 let end = r.offset();
100
101 let raw = r.range(start + 1..end - 1)?;
103 let decoded = ascii_hex::decode(raw).unwrap_or_default();
106
107 Some(SmallVec::from_vec(decoded))
108}
109
110fn skip_literal(r: &mut Reader<'_>) -> Option<()> {
111 r.forward_tag(b"(")?;
112 let mut bracket_counter = 1;
113
114 while bracket_counter > 0 {
115 let byte = r.read_byte()?;
116
117 match byte {
118 b'\\' => {
119 let _ = r.read_byte()?;
120 }
121 b'(' => bracket_counter += 1,
122 b')' => bracket_counter -= 1,
123 _ => {}
124 };
125 }
126
127 Some(())
128}
129
130fn read_literal(r: &mut Reader<'_>) -> Option<StringInner> {
131 let start = r.offset();
132 skip_literal(r)?;
133 let end = r.offset();
134
135 let data = r.range(start + 1..end - 1)?;
137
138 let mut r = Reader::new(data);
139 let mut result = SmallVec::new();
140
141 while let Some(byte) = r.read_byte() {
142 match byte {
143 b'\\' => {
144 let next = r.read_byte()?;
145
146 if is_octal_digit(next) {
147 let second = r.read_byte();
148 let third = r.read_byte();
149
150 let bytes = match (second, third) {
151 (Some(n1), Some(n2)) => match (is_octal_digit(n1), is_octal_digit(n2)) {
152 (true, true) => [next, n1, n2],
153 (true, _) => {
154 r.jump(r.offset() - 1);
155 [b'0', next, n1]
156 }
157 _ => {
158 r.jump(r.offset() - 2);
159 [b'0', b'0', next]
160 }
161 },
162 (Some(n1), None) => {
163 if is_octal_digit(n1) {
164 [b'0', next, n1]
165 } else {
166 r.jump(r.offset() - 1);
167 [b'0', b'0', next]
168 }
169 }
170 _ => [b'0', b'0', next],
171 };
172
173 let value = bytes
175 .iter()
176 .fold(0u16, |acc, &b| acc * 8 + (b - b'0') as u16);
177 if value <= 255 {
178 result.push(value as u8);
179 } else {
180 warn!("overflow occurred while parsing octal literal string");
181 }
182 } else {
183 match next {
184 b'n' => result.push(0xA),
185 b'r' => result.push(0xD),
186 b't' => result.push(0x9),
187 b'b' => result.push(0x8),
188 b'f' => result.push(0xC),
189 b'(' => result.push(b'('),
190 b')' => result.push(b')'),
191 b'\\' => result.push(b'\\'),
192 b'\n' | b'\r' => {
193 r.skip_eol_characters();
199 }
200 _ => result.push(next),
201 }
202 }
203 }
204 b'(' | b')' => result.push(byte),
205 b'\n' | b'\r' => {
210 result.push(b'\n');
211 r.skip_eol_characters();
212 }
213 other => result.push(other),
214 }
215 }
216
217 Some(result)
218}
219
220fn is_octal_digit(byte: u8) -> bool {
221 matches!(byte, b'0'..=b'7')
222}
223
224#[cfg(test)]
225mod tests {
226 use crate::object::String;
227 use crate::reader::Reader;
228 use crate::reader::ReaderExt;
229
230 #[test]
231 fn hex_string_empty() {
232 assert_eq!(
233 Reader::new(b"<>")
234 .read_without_context::<String>()
235 .unwrap()
236 .as_bytes(),
237 b""
238 );
239 }
240
241 #[test]
242 fn hex_string_1() {
243 assert_eq!(
244 Reader::new(b"<00010203>")
245 .read_without_context::<String>()
246 .unwrap()
247 .as_bytes(),
248 &[0x00, 0x01, 0x02, 0x03]
249 );
250 }
251
252 #[test]
253 fn hex_string_2() {
254 assert_eq!(
255 Reader::new(b"<000102034>")
256 .read_without_context::<String>()
257 .unwrap()
258 .as_bytes(),
259 &[0x00, 0x01, 0x02, 0x03, 0x40]
260 );
261 }
262
263 #[test]
264 fn hex_string_trailing_1() {
265 assert_eq!(
266 Reader::new(b"<000102034>dfgfg4")
267 .read_without_context::<String>()
268 .unwrap()
269 .as_bytes(),
270 &[0x00, 0x01, 0x02, 0x03, 0x40]
271 );
272 }
273
274 #[test]
275 fn hex_string_trailing_2() {
276 assert_eq!(
277 Reader::new(b"<1 3 4>dfgfg4")
278 .read_without_context::<String>()
279 .unwrap()
280 .as_bytes(),
281 &[0x13, 0x40]
282 );
283 }
284
285 #[test]
286 fn hex_string_trailing_3() {
287 assert_eq!(
288 Reader::new(b"<1>dfgfg4")
289 .read_without_context::<String>()
290 .unwrap()
291 .as_bytes(),
292 &[0x10]
293 );
294 }
295
296 #[test]
297 fn hex_string_invalid_1() {
298 assert!(Reader::new(b"<").read_without_context::<String>().is_none());
299 }
300
301 #[test]
302 fn hex_string_invalid_2() {
303 assert!(
304 Reader::new(b"34AD")
305 .read_without_context::<String>()
306 .is_none()
307 );
308 }
309
310 #[test]
311 fn literal_string_empty() {
312 assert_eq!(
313 Reader::new(b"()")
314 .read_without_context::<String>()
315 .unwrap()
316 .as_bytes(),
317 b""
318 );
319 }
320
321 #[test]
322 fn literal_string_1() {
323 assert_eq!(
324 Reader::new(b"(Hi there.)")
325 .read_without_context::<String>()
326 .unwrap()
327 .as_bytes(),
328 b"Hi there."
329 );
330 }
331
332 #[test]
333 fn literal_string_2() {
334 assert!(
335 Reader::new(b"(Hi \\777)")
336 .read_without_context::<String>()
337 .is_some()
338 );
339 }
340
341 #[test]
342 fn literal_string_3() {
343 assert_eq!(
344 Reader::new(b"(Hi ) there.)")
345 .read_without_context::<String>()
346 .unwrap()
347 .as_bytes(),
348 b"Hi "
349 );
350 }
351
352 #[test]
353 fn literal_string_4() {
354 assert_eq!(
355 Reader::new(b"(Hi (()) there)")
356 .read_without_context::<String>()
357 .unwrap()
358 .as_bytes(),
359 b"Hi (()) there"
360 );
361 }
362
363 #[test]
364 fn literal_string_5() {
365 assert_eq!(
366 Reader::new(b"(Hi \\()")
367 .read_without_context::<String>()
368 .unwrap()
369 .as_bytes(),
370 b"Hi ("
371 );
372 }
373
374 #[test]
375 fn literal_string_6() {
376 assert_eq!(
377 Reader::new(b"(Hi \\\nthere)")
378 .read_without_context::<String>()
379 .unwrap()
380 .as_bytes(),
381 b"Hi there"
382 );
383 }
384
385 #[test]
386 fn literal_string_7() {
387 assert_eq!(
388 Reader::new(b"(Hi \\05354)")
389 .read_without_context::<String>()
390 .unwrap()
391 .as_bytes(),
392 b"Hi +54"
393 );
394 }
395
396 #[test]
397 fn literal_string_8() {
398 assert_eq!(
399 Reader::new(b"(\\3)")
400 .read_without_context::<String>()
401 .unwrap()
402 .as_bytes(),
403 b"\x03"
404 );
405 }
406
407 #[test]
408 fn literal_string_9() {
409 assert_eq!(
410 Reader::new(b"(\\36)")
411 .read_without_context::<String>()
412 .unwrap()
413 .as_bytes(),
414 b"\x1e"
415 );
416 }
417
418 #[test]
419 fn literal_string_10() {
420 assert_eq!(
421 Reader::new(b"(\\36ab)")
422 .read_without_context::<String>()
423 .unwrap()
424 .as_bytes(),
425 b"\x1eab"
426 );
427 }
428
429 #[test]
430 fn literal_string_11() {
431 assert_eq!(
432 Reader::new(b"(\\00Y)")
433 .read_without_context::<String>()
434 .unwrap()
435 .as_bytes(),
436 b"\0Y"
437 );
438 }
439
440 #[test]
441 fn literal_string_12() {
442 assert_eq!(
443 Reader::new(b"(\\0Y)")
444 .read_without_context::<String>()
445 .unwrap()
446 .as_bytes(),
447 b"\0Y"
448 );
449 }
450
451 #[test]
452 fn literal_string_trailing() {
453 assert_eq!(
454 Reader::new(b"(Hi there.)abcde")
455 .read_without_context::<String>()
456 .unwrap()
457 .as_bytes(),
458 b"Hi there."
459 );
460 }
461
462 #[test]
463 fn literal_string_invalid() {
464 assert_eq!(
465 Reader::new(b"(Hi \\778)")
466 .read_without_context::<String>()
467 .unwrap()
468 .as_bytes(),
469 b"Hi \x3F8"
470 );
471 }
472
473 #[test]
474 fn string_1() {
475 assert_eq!(
476 Reader::new(b"(Hi there.)")
477 .read_without_context::<String>()
478 .unwrap()
479 .as_bytes(),
480 b"Hi there."
481 );
482 }
483
484 #[test]
485 fn string_2() {
486 assert_eq!(
487 Reader::new(b"<00010203>")
488 .read_without_context::<String>()
489 .unwrap()
490 .as_bytes(),
491 &[0x00, 0x01, 0x02, 0x03]
492 );
493 }
494}