1use crate::crypto::DecryptionTarget;
4use crate::filter::ascii_hex::decode_hex_string;
5use crate::object::macros::object;
6use crate::object::{Object, ObjectLike};
7use crate::reader::{Readable, Reader, ReaderContext, Skippable};
8use crate::trivia::is_white_space_character;
9use log::warn;
10use std::borrow::Cow;
11use std::hash::{Hash, Hasher};
12#[derive(Clone, Debug)]
16struct HexString<'a>(&'a [u8], bool, ReaderContext<'a>);
17
18impl HexString<'_> {
19 fn get(&self) -> Vec<u8> {
21 let decoded = if self.1 {
22 let mut cleaned = Vec::with_capacity(self.0.len() + 1);
23
24 for b in self.0.iter().copied() {
25 if !is_white_space_character(b) {
26 cleaned.push(b);
27 }
28 }
29
30 if cleaned.len() % 2 != 0 {
31 cleaned.push(b'0');
32 }
33
34 decode_hex_string(&cleaned).unwrap()
36 } else {
37 decode_hex_string(self.0).unwrap()
39 };
40
41 if self.2.xref.needs_decryption(&self.2) {
42 self.2
43 .xref
44 .decrypt(
45 self.2.obj_number.unwrap(),
46 &decoded,
47 DecryptionTarget::String,
48 )
49 .unwrap_or_default()
50 } else {
51 decoded
52 }
53 }
54}
55
56impl PartialEq for HexString<'_> {
57 fn eq(&self, other: &Self) -> bool {
58 self.0 == other.0 && self.1 == other.1
60 }
61}
62
63impl Skippable for HexString<'_> {
64 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
65 parse_hex(r).map(|_| {})
66 }
67}
68
69impl<'a> Readable<'a> for HexString<'a> {
70 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
71 let start = r.offset();
72 let mut dirty = parse_hex(r)?;
73 let end = r.offset();
74
75 let result = r.range(start + 1..end - 1).unwrap();
77 dirty |= !result.len().is_multiple_of(2);
78
79 Some(HexString(result, dirty, ctx.clone()))
80 }
81}
82
83impl<'a> TryFrom<Object<'a>> for HexString<'a> {
84 type Error = ();
85
86 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
87 match value {
88 Object::String(String(InnerString::Hex(h))) => Ok(h),
89 _ => Err(()),
90 }
91 }
92}
93
94impl<'a> ObjectLike<'a> for HexString<'a> {}
95
96fn parse_hex(r: &mut Reader<'_>) -> Option<bool> {
97 let mut has_whitespace = false;
98
99 r.forward_tag(b"<")?;
100 while let Some(b) = r.peek_byte() {
101 let is_hex = b.is_ascii_hexdigit();
102 let is_whitespace = is_white_space_character(b);
103 has_whitespace |= is_whitespace;
104
105 if !is_hex && !is_whitespace {
106 break;
107 }
108
109 r.read_byte()?;
110 }
111 r.forward_tag(b">")?;
112
113 Some(has_whitespace)
114}
115
116#[derive(Debug, Clone)]
118struct LiteralString<'a>(&'a [u8], bool, ReaderContext<'a>);
119
120impl<'a> LiteralString<'a> {
121 fn get(&self) -> Cow<'a, [u8]> {
123 let decoded = if self.1 {
124 let mut cleaned = vec![];
125 let mut r = Reader::new(self.0);
126
127 while let Some(byte) = r.read_byte() {
128 match byte {
129 b'\\' => {
130 let next = r.read_byte().unwrap();
131
132 if is_octal_digit(next) {
133 let second = r.read_byte();
134 let third = r.read_byte();
135
136 let bytes = match (second, third) {
137 (Some(n1), Some(n2)) => {
138 match (is_octal_digit(n1), is_octal_digit(n2)) {
139 (true, true) => [next, n1, n2],
140 (true, _) => {
141 r.jump(r.offset() - 1);
142 [b'0', next, n1]
143 }
144 _ => {
145 r.jump(r.offset() - 2);
146 [b'0', b'0', next]
147 }
148 }
149 }
150 (Some(n1), None) => {
151 if is_octal_digit(n1) {
152 [b'0', next, n1]
153 } else {
154 r.jump(r.offset() - 1);
155 [b'0', b'0', next]
156 }
157 }
158 _ => [b'0', b'0', next],
159 };
160
161 let str = std::str::from_utf8(&bytes).unwrap();
162
163 if let Ok(num) = u8::from_str_radix(str, 8) {
164 cleaned.push(num);
165 } else {
166 warn!("overflow occurred while parsing octal literal string");
167 }
168 } else {
169 match next {
170 b'n' => cleaned.push(0xA),
171 b'r' => cleaned.push(0xD),
172 b't' => cleaned.push(0x9),
173 b'b' => cleaned.push(0x8),
174 b'f' => cleaned.push(0xC),
175 b'(' => cleaned.push(b'('),
176 b')' => cleaned.push(b')'),
177 b'\\' => cleaned.push(b'\\'),
178 b'\n' | b'\r' => {
179 r.skip_eol_characters();
185 }
186 _ => cleaned.push(next),
187 }
188 }
189 }
190 b'\n' | b'\r' => {
195 cleaned.push(b'\n');
196 r.skip_eol_characters();
197 }
198 other => cleaned.push(other),
199 }
200 }
201
202 Cow::Owned(cleaned)
203 } else {
204 Cow::Borrowed(self.0)
205 };
206
207 if self.2.xref.needs_decryption(&self.2) {
208 Cow::Owned(
209 self.2
210 .xref
211 .decrypt(
212 self.2.obj_number.unwrap(),
213 &decoded,
214 DecryptionTarget::String,
215 )
216 .unwrap_or_default(),
217 )
218 } else {
219 decoded
220 }
221 }
222}
223
224impl Hash for LiteralString<'_> {
225 fn hash<H: Hasher>(&self, state: &mut H) {
226 self.0.hash(state);
227 self.1.hash(state);
228 }
229}
230
231impl PartialEq for LiteralString<'_> {
232 fn eq(&self, other: &Self) -> bool {
233 self.0.eq(other.0) && self.1.eq(&other.1)
234 }
235}
236
237impl Skippable for LiteralString<'_> {
238 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
239 parse_literal(r).map(|_| ())
240 }
241}
242
243impl<'a> Readable<'a> for LiteralString<'a> {
244 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
245 let start = r.offset();
246 let dirty = parse_literal(r)?;
247 let end = r.offset();
248
249 let result = r.range(start + 1..end - 1).unwrap();
251
252 Some(LiteralString(result, dirty, ctx.clone()))
253 }
254}
255
256impl<'a> TryFrom<Object<'a>> for LiteralString<'a> {
257 type Error = ();
258
259 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
260 match value {
261 Object::String(String(InnerString::Literal(l))) => Ok(l),
262 _ => Err(()),
263 }
264 }
265}
266
267impl<'a> ObjectLike<'a> for LiteralString<'a> {}
268
269fn parse_literal(r: &mut Reader<'_>) -> Option<bool> {
270 r.forward_tag(b"(")?;
271 let mut bracket_counter = 1;
272 let mut dirty = false;
273
274 while bracket_counter > 0 {
275 let byte = r.read_byte()?;
276
277 match byte {
278 b'\\' => {
279 dirty = true;
280
281 let _ = r.read_byte()?;
282 }
283 b'(' => bracket_counter += 1,
284 b')' => bracket_counter -= 1,
285 b'\n' | b'\r' => dirty = true,
286 _ => {}
287 };
288 }
289
290 Some(dirty)
291}
292
293#[derive(Clone, Debug, PartialEq)]
294enum InnerString<'a> {
295 Hex(HexString<'a>),
296 Literal(LiteralString<'a>),
297}
298
299#[derive(Clone, Debug, PartialEq)]
301pub struct String<'a>(InnerString<'a>);
302
303impl<'a> String<'a> {
304 pub fn get(&self) -> Cow<'a, [u8]> {
306 match &self.0 {
307 InnerString::Hex(hex) => Cow::Owned(hex.get()),
308 InnerString::Literal(lit) => lit.get(),
309 }
310 }
311}
312
313impl<'a> From<HexString<'a>> for String<'a> {
314 fn from(value: HexString<'a>) -> Self {
315 Self(InnerString::Hex(value))
316 }
317}
318
319impl<'a> From<LiteralString<'a>> for String<'a> {
320 fn from(value: LiteralString<'a>) -> Self {
321 Self(InnerString::Literal(value))
322 }
323}
324
325object!(String<'a>, String);
326
327impl Skippable for String<'_> {
328 fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
329 match r.peek_byte()? {
330 b'<' => HexString::skip(r, is_content_stream),
331 b'(' => LiteralString::skip(r, is_content_stream),
332 _ => None,
333 }
334 }
335}
336
337impl<'a> Readable<'a> for String<'a> {
338 fn read(r: &mut Reader<'a>, _: &ReaderContext) -> Option<Self> {
339 let inner = match r.peek_byte()? {
340 b'<' => InnerString::Hex(r.read_without_context::<HexString>()?),
341 b'(' => InnerString::Literal(r.read_without_context::<LiteralString>()?),
342 _ => return None,
343 };
344
345 Some(String(inner))
346 }
347}
348
349fn is_octal_digit(byte: u8) -> bool {
350 matches!(byte, b'0'..=b'7')
351}
352
353#[cfg(test)]
354mod tests {
355 use crate::object::string::{HexString, LiteralString, String};
356 use crate::reader::Reader;
357
358 #[test]
359 fn hex_string_empty() {
360 assert_eq!(
361 Reader::new("<>".as_bytes())
362 .read_without_context::<HexString>()
363 .unwrap()
364 .get(),
365 vec![]
366 );
367 }
368
369 #[test]
370 fn hex_string_1() {
371 assert_eq!(
372 Reader::new("<00010203>".as_bytes())
373 .read_without_context::<HexString>()
374 .unwrap()
375 .get(),
376 vec![0x00, 0x01, 0x02, 0x03]
377 );
378 }
379
380 #[test]
381 fn hex_string_2() {
382 assert_eq!(
383 Reader::new("<000102034>".as_bytes())
384 .read_without_context::<HexString>()
385 .unwrap()
386 .get(),
387 vec![0x00, 0x01, 0x02, 0x03, 0x40]
388 );
389 }
390
391 #[test]
392 fn hex_string_trailing_1() {
393 assert_eq!(
394 Reader::new("<000102034>dfgfg4".as_bytes())
395 .read_without_context::<HexString>()
396 .unwrap()
397 .get(),
398 vec![0x00, 0x01, 0x02, 0x03, 0x40]
399 );
400 }
401
402 #[test]
403 fn hex_string_trailing_2() {
404 assert_eq!(
405 Reader::new("<1 3 4>dfgfg4".as_bytes())
406 .read_without_context::<HexString>()
407 .unwrap()
408 .get(),
409 vec![0x13, 0x40]
410 );
411 }
412
413 #[test]
414 fn hex_string_trailing_3() {
415 assert_eq!(
416 Reader::new("<1>dfgfg4".as_bytes())
417 .read_without_context::<HexString>()
418 .unwrap()
419 .get(),
420 vec![0x10]
421 );
422 }
423
424 #[test]
425 fn hex_string_invalid_1() {
426 assert!(
427 Reader::new("<".as_bytes())
428 .read_without_context::<HexString>()
429 .is_none()
430 );
431 }
432
433 #[test]
434 fn hex_string_invalid_2() {
435 assert!(
436 Reader::new("34AD".as_bytes())
437 .read_without_context::<HexString>()
438 .is_none()
439 );
440 }
441
442 #[test]
443 fn literal_string_empty() {
444 assert_eq!(
445 Reader::new("()".as_bytes())
446 .read_without_context::<LiteralString>()
447 .unwrap()
448 .get()
449 .to_vec(),
450 b"".to_vec()
451 );
452 }
453
454 #[test]
455 fn literal_string_1() {
456 assert_eq!(
457 Reader::new("(Hi there.)".as_bytes())
458 .read_without_context::<LiteralString>()
459 .unwrap()
460 .get()
461 .to_vec(),
462 b"Hi there.".to_vec()
463 );
464 }
465
466 #[test]
467 fn literal_string_2() {
468 assert!(
469 Reader::new("(Hi \\777)".as_bytes())
470 .read_without_context::<LiteralString>()
471 .is_some()
472 );
473 }
474
475 #[test]
476 fn literal_string_3() {
477 assert_eq!(
478 Reader::new("(Hi ) there.)".as_bytes())
479 .read_without_context::<LiteralString>()
480 .unwrap()
481 .get()
482 .to_vec(),
483 b"Hi ".to_vec()
484 );
485 }
486
487 #[test]
488 fn literal_string_4() {
489 assert_eq!(
490 Reader::new("(Hi (()) there)".as_bytes())
491 .read_without_context::<LiteralString>()
492 .unwrap()
493 .get()
494 .to_vec(),
495 b"Hi (()) there".to_vec()
496 );
497 }
498
499 #[test]
500 fn literal_string_5() {
501 assert_eq!(
502 Reader::new("(Hi \\()".as_bytes())
503 .read_without_context::<LiteralString>()
504 .unwrap()
505 .get()
506 .to_vec(),
507 b"Hi (".to_vec()
508 );
509 }
510
511 #[test]
512 fn literal_string_6() {
513 assert_eq!(
514 Reader::new("(Hi \\\nthere)".as_bytes())
515 .read_without_context::<LiteralString>()
516 .unwrap()
517 .get()
518 .to_vec(),
519 b"Hi there".to_vec()
520 );
521 }
522
523 #[test]
524 fn literal_string_7() {
525 assert_eq!(
526 Reader::new("(Hi \\05354)".as_bytes())
527 .read_without_context::<LiteralString>()
528 .unwrap()
529 .get()
530 .to_vec(),
531 b"Hi +54".to_vec()
532 );
533 }
534
535 #[test]
536 fn literal_string_8() {
537 assert_eq!(
538 Reader::new("(\\3)".as_bytes())
539 .read_without_context::<String>()
540 .unwrap()
541 .get(),
542 b"\x03".to_vec()
543 )
544 }
545
546 #[test]
547 fn literal_string_9() {
548 assert_eq!(
549 Reader::new("(\\36)".as_bytes())
550 .read_without_context::<String>()
551 .unwrap()
552 .get(),
553 b"\x1e".to_vec()
554 )
555 }
556
557 #[test]
558 fn literal_string_10() {
559 assert_eq!(
560 Reader::new("(\\36ab)".as_bytes())
561 .read_without_context::<String>()
562 .unwrap()
563 .get(),
564 b"\x1eab".to_vec()
565 )
566 }
567
568 #[test]
569 fn literal_string_11() {
570 assert_eq!(
571 Reader::new("(\\00Y)".as_bytes())
572 .read_without_context::<String>()
573 .unwrap()
574 .get(),
575 b"\0Y".to_vec()
576 )
577 }
578
579 #[test]
580 fn literal_string_12() {
581 assert_eq!(
582 Reader::new("(\\0Y)".as_bytes())
583 .read_without_context::<String>()
584 .unwrap()
585 .get(),
586 b"\0Y".to_vec()
587 )
588 }
589
590 #[test]
591 fn literal_string_trailing() {
592 assert_eq!(
593 Reader::new("(Hi there.)abcde".as_bytes())
594 .read_without_context::<LiteralString>()
595 .unwrap()
596 .get()
597 .to_vec(),
598 b"Hi there.".to_vec()
599 );
600 }
601
602 #[test]
603 fn literal_string_invalid() {
604 assert_eq!(
605 Reader::new("(Hi \\778)".as_bytes())
606 .read_without_context::<LiteralString>()
607 .unwrap()
608 .get()
609 .to_vec(),
610 b"Hi \x3F8".to_vec()
611 );
612 }
613
614 #[test]
615 fn string_1() {
616 assert_eq!(
617 Reader::new("(Hi there.)".as_bytes())
618 .read_without_context::<String>()
619 .unwrap()
620 .get()
621 .to_vec(),
622 b"Hi there.".to_vec()
623 );
624 }
625
626 #[test]
627 fn string_2() {
628 assert_eq!(
629 Reader::new("<00010203>".as_bytes())
630 .read_without_context::<String>()
631 .unwrap()
632 .get(),
633 vec![0x00, 0x01, 0x02, 0x03]
634 );
635 }
636}