1use crate::crypto::DecryptionTarget;
4use crate::filter::ascii_hex::decode_hex_string;
5use crate::object::macros::object;
6use crate::object::{Object, ObjectLike};
7use crate::reader::Reader;
8use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
9use crate::trivia::is_white_space_character;
10use log::warn;
11use std::borrow::Cow;
12use std::hash::{Hash, Hasher};
13#[derive(Clone, Debug)]
17struct HexString<'a>(&'a [u8], bool, ReaderContext<'a>);
18
19impl HexString<'_> {
20 fn get(&self) -> Vec<u8> {
22 let decoded = if self.1 {
23 let mut cleaned = Vec::with_capacity(self.0.len() + 1);
24
25 for b in self.0.iter().copied() {
26 if !is_white_space_character(b) {
27 cleaned.push(b);
28 }
29 }
30
31 if cleaned.len() % 2 != 0 {
32 cleaned.push(b'0');
33 }
34
35 decode_hex_string(&cleaned).unwrap()
37 } else {
38 decode_hex_string(self.0).unwrap()
40 };
41
42 if self.2.xref.needs_decryption(&self.2) {
43 self.2
44 .xref
45 .decrypt(
46 self.2.obj_number.unwrap(),
47 &decoded,
48 DecryptionTarget::String,
49 )
50 .unwrap_or_default()
51 } else {
52 decoded
53 }
54 }
55}
56
57impl PartialEq for HexString<'_> {
58 fn eq(&self, other: &Self) -> bool {
59 self.0 == other.0 && self.1 == other.1
61 }
62}
63
64impl Skippable for HexString<'_> {
65 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
66 parse_hex(r).map(|_| {})
67 }
68}
69
70impl<'a> Readable<'a> for HexString<'a> {
71 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
72 let start = r.offset();
73 let mut dirty = parse_hex(r)?;
74 let end = r.offset();
75
76 let result = r.range(start + 1..end - 1).unwrap();
78 dirty |= !result.len().is_multiple_of(2);
79
80 Some(HexString(result, dirty, ctx.clone()))
81 }
82}
83
84impl<'a> TryFrom<Object<'a>> for HexString<'a> {
85 type Error = ();
86
87 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
88 match value {
89 Object::String(String(InnerString::Hex(h))) => Ok(h),
90 _ => Err(()),
91 }
92 }
93}
94
95impl<'a> ObjectLike<'a> for HexString<'a> {}
96
97fn parse_hex(r: &mut Reader<'_>) -> Option<bool> {
98 let mut has_whitespace = false;
99
100 r.forward_tag(b"<")?;
101 while let Some(b) = r.peek_byte() {
102 let is_hex = b.is_ascii_hexdigit();
103 let is_whitespace = is_white_space_character(b);
104 has_whitespace |= is_whitespace;
105
106 if !is_hex && !is_whitespace {
107 break;
108 }
109
110 r.read_byte()?;
111 }
112 r.forward_tag(b">")?;
113
114 Some(has_whitespace)
115}
116
117#[derive(Debug, Clone)]
119struct LiteralString<'a>(&'a [u8], bool, ReaderContext<'a>);
120
121impl<'a> LiteralString<'a> {
122 fn get(&self) -> Cow<'a, [u8]> {
124 let decoded = if self.1 {
125 let mut cleaned = vec![];
126 let mut r = Reader::new(self.0);
127
128 while let Some(byte) = r.read_byte() {
129 match byte {
130 b'\\' => {
131 let next = r.read_byte().unwrap();
132
133 if is_octal_digit(next) {
134 let second = r.read_byte();
135 let third = r.read_byte();
136
137 let bytes = match (second, third) {
138 (Some(n1), Some(n2)) => {
139 match (is_octal_digit(n1), is_octal_digit(n2)) {
140 (true, true) => [next, n1, n2],
141 (true, _) => {
142 r.jump(r.offset() - 1);
143 [b'0', next, n1]
144 }
145 _ => {
146 r.jump(r.offset() - 2);
147 [b'0', b'0', next]
148 }
149 }
150 }
151 (Some(n1), None) => {
152 if is_octal_digit(n1) {
153 [b'0', next, n1]
154 } else {
155 r.jump(r.offset() - 1);
156 [b'0', b'0', next]
157 }
158 }
159 _ => [b'0', b'0', next],
160 };
161
162 let str = std::str::from_utf8(&bytes).unwrap();
163
164 if let Ok(num) = u8::from_str_radix(str, 8) {
165 cleaned.push(num);
166 } else {
167 warn!("overflow occurred while parsing octal literal string");
168 }
169 } else {
170 match next {
171 b'n' => cleaned.push(0xA),
172 b'r' => cleaned.push(0xD),
173 b't' => cleaned.push(0x9),
174 b'b' => cleaned.push(0x8),
175 b'f' => cleaned.push(0xC),
176 b'(' => cleaned.push(b'('),
177 b')' => cleaned.push(b')'),
178 b'\\' => cleaned.push(b'\\'),
179 b'\n' | b'\r' => {
180 r.skip_eol_characters();
186 }
187 _ => cleaned.push(next),
188 }
189 }
190 }
191 b'\n' | b'\r' => {
196 cleaned.push(b'\n');
197 r.skip_eol_characters();
198 }
199 other => cleaned.push(other),
200 }
201 }
202
203 Cow::Owned(cleaned)
204 } else {
205 Cow::Borrowed(self.0)
206 };
207
208 if self.2.xref.needs_decryption(&self.2) {
209 if let Some(obj_number) = self.2.obj_number {
212 Cow::Owned(
213 self.2
214 .xref
215 .decrypt(obj_number, &decoded, DecryptionTarget::String)
216 .unwrap_or_default(),
217 )
218 } else {
219 decoded
220 }
221 } else {
222 decoded
223 }
224 }
225}
226
227impl Hash for LiteralString<'_> {
228 fn hash<H: Hasher>(&self, state: &mut H) {
229 self.0.hash(state);
230 self.1.hash(state);
231 }
232}
233
234impl PartialEq for LiteralString<'_> {
235 fn eq(&self, other: &Self) -> bool {
236 self.0.eq(other.0) && self.1.eq(&other.1)
237 }
238}
239
240impl Skippable for LiteralString<'_> {
241 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
242 parse_literal(r).map(|_| ())
243 }
244}
245
246impl<'a> Readable<'a> for LiteralString<'a> {
247 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
248 let start = r.offset();
249 let dirty = parse_literal(r)?;
250 let end = r.offset();
251
252 let result = r.range(start + 1..end - 1).unwrap();
254
255 Some(LiteralString(result, dirty, ctx.clone()))
256 }
257}
258
259impl<'a> TryFrom<Object<'a>> for LiteralString<'a> {
260 type Error = ();
261
262 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
263 match value {
264 Object::String(String(InnerString::Literal(l))) => Ok(l),
265 _ => Err(()),
266 }
267 }
268}
269
270impl<'a> ObjectLike<'a> for LiteralString<'a> {}
271
272fn parse_literal(r: &mut Reader<'_>) -> Option<bool> {
273 r.forward_tag(b"(")?;
274 let mut bracket_counter = 1;
275 let mut dirty = false;
276
277 while bracket_counter > 0 {
278 let byte = r.read_byte()?;
279
280 match byte {
281 b'\\' => {
282 dirty = true;
283
284 let _ = r.read_byte()?;
285 }
286 b'(' => bracket_counter += 1,
287 b')' => bracket_counter -= 1,
288 b'\n' | b'\r' => dirty = true,
289 _ => {}
290 };
291 }
292
293 Some(dirty)
294}
295
296#[derive(Clone, Debug, PartialEq)]
297enum InnerString<'a> {
298 Hex(HexString<'a>),
299 Literal(LiteralString<'a>),
300}
301
302#[derive(Clone, Debug, PartialEq)]
304pub struct String<'a>(InnerString<'a>);
305
306impl<'a> String<'a> {
307 pub fn get(&self) -> Cow<'a, [u8]> {
309 match &self.0 {
310 InnerString::Hex(hex) => Cow::Owned(hex.get()),
311 InnerString::Literal(lit) => lit.get(),
312 }
313 }
314}
315
316impl<'a> From<HexString<'a>> for String<'a> {
317 fn from(value: HexString<'a>) -> Self {
318 Self(InnerString::Hex(value))
319 }
320}
321
322impl<'a> From<LiteralString<'a>> for String<'a> {
323 fn from(value: LiteralString<'a>) -> Self {
324 Self(InnerString::Literal(value))
325 }
326}
327
328object!(String<'a>, String);
329
330impl Skippable for String<'_> {
331 fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
332 match r.peek_byte()? {
333 b'<' => HexString::skip(r, is_content_stream),
334 b'(' => LiteralString::skip(r, is_content_stream),
335 _ => None,
336 }
337 }
338}
339
340impl<'a> Readable<'a> for String<'a> {
341 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
342 let inner = match r.peek_byte()? {
343 b'<' => InnerString::Hex(r.read::<HexString<'_>>(ctx)?),
344 b'(' => InnerString::Literal(r.read::<LiteralString<'_>>(ctx)?),
345 _ => return None,
346 };
347
348 Some(String(inner))
349 }
350}
351
352fn is_octal_digit(byte: u8) -> bool {
353 matches!(byte, b'0'..=b'7')
354}
355
356#[cfg(test)]
357mod tests {
358 use crate::object::string::{HexString, LiteralString, String};
359 use crate::reader::Reader;
360 use crate::reader::ReaderExt;
361
362 #[test]
363 fn hex_string_empty() {
364 assert_eq!(
365 Reader::new("<>".as_bytes())
366 .read_without_context::<HexString<'_>>()
367 .unwrap()
368 .get(),
369 vec![]
370 );
371 }
372
373 #[test]
374 fn hex_string_1() {
375 assert_eq!(
376 Reader::new("<00010203>".as_bytes())
377 .read_without_context::<HexString<'_>>()
378 .unwrap()
379 .get(),
380 vec![0x00, 0x01, 0x02, 0x03]
381 );
382 }
383
384 #[test]
385 fn hex_string_2() {
386 assert_eq!(
387 Reader::new("<000102034>".as_bytes())
388 .read_without_context::<HexString<'_>>()
389 .unwrap()
390 .get(),
391 vec![0x00, 0x01, 0x02, 0x03, 0x40]
392 );
393 }
394
395 #[test]
396 fn hex_string_trailing_1() {
397 assert_eq!(
398 Reader::new("<000102034>dfgfg4".as_bytes())
399 .read_without_context::<HexString<'_>>()
400 .unwrap()
401 .get(),
402 vec![0x00, 0x01, 0x02, 0x03, 0x40]
403 );
404 }
405
406 #[test]
407 fn hex_string_trailing_2() {
408 assert_eq!(
409 Reader::new("<1 3 4>dfgfg4".as_bytes())
410 .read_without_context::<HexString<'_>>()
411 .unwrap()
412 .get(),
413 vec![0x13, 0x40]
414 );
415 }
416
417 #[test]
418 fn hex_string_trailing_3() {
419 assert_eq!(
420 Reader::new("<1>dfgfg4".as_bytes())
421 .read_without_context::<HexString<'_>>()
422 .unwrap()
423 .get(),
424 vec![0x10]
425 );
426 }
427
428 #[test]
429 fn hex_string_invalid_1() {
430 assert!(
431 Reader::new("<".as_bytes())
432 .read_without_context::<HexString<'_>>()
433 .is_none()
434 );
435 }
436
437 #[test]
438 fn hex_string_invalid_2() {
439 assert!(
440 Reader::new("34AD".as_bytes())
441 .read_without_context::<HexString<'_>>()
442 .is_none()
443 );
444 }
445
446 #[test]
447 fn literal_string_empty() {
448 assert_eq!(
449 Reader::new("()".as_bytes())
450 .read_without_context::<LiteralString<'_>>()
451 .unwrap()
452 .get()
453 .to_vec(),
454 b"".to_vec()
455 );
456 }
457
458 #[test]
459 fn literal_string_1() {
460 assert_eq!(
461 Reader::new("(Hi there.)".as_bytes())
462 .read_without_context::<LiteralString<'_>>()
463 .unwrap()
464 .get()
465 .to_vec(),
466 b"Hi there.".to_vec()
467 );
468 }
469
470 #[test]
471 fn literal_string_2() {
472 assert!(
473 Reader::new("(Hi \\777)".as_bytes())
474 .read_without_context::<LiteralString<'_>>()
475 .is_some()
476 );
477 }
478
479 #[test]
480 fn literal_string_3() {
481 assert_eq!(
482 Reader::new("(Hi ) there.)".as_bytes())
483 .read_without_context::<LiteralString<'_>>()
484 .unwrap()
485 .get()
486 .to_vec(),
487 b"Hi ".to_vec()
488 );
489 }
490
491 #[test]
492 fn literal_string_4() {
493 assert_eq!(
494 Reader::new("(Hi (()) there)".as_bytes())
495 .read_without_context::<LiteralString<'_>>()
496 .unwrap()
497 .get()
498 .to_vec(),
499 b"Hi (()) there".to_vec()
500 );
501 }
502
503 #[test]
504 fn literal_string_5() {
505 assert_eq!(
506 Reader::new("(Hi \\()".as_bytes())
507 .read_without_context::<LiteralString<'_>>()
508 .unwrap()
509 .get()
510 .to_vec(),
511 b"Hi (".to_vec()
512 );
513 }
514
515 #[test]
516 fn literal_string_6() {
517 assert_eq!(
518 Reader::new("(Hi \\\nthere)".as_bytes())
519 .read_without_context::<LiteralString<'_>>()
520 .unwrap()
521 .get()
522 .to_vec(),
523 b"Hi there".to_vec()
524 );
525 }
526
527 #[test]
528 fn literal_string_7() {
529 assert_eq!(
530 Reader::new("(Hi \\05354)".as_bytes())
531 .read_without_context::<LiteralString<'_>>()
532 .unwrap()
533 .get()
534 .to_vec(),
535 b"Hi +54".to_vec()
536 );
537 }
538
539 #[test]
540 fn literal_string_8() {
541 assert_eq!(
542 Reader::new("(\\3)".as_bytes())
543 .read_without_context::<String<'_>>()
544 .unwrap()
545 .get(),
546 b"\x03".to_vec()
547 );
548 }
549
550 #[test]
551 fn literal_string_9() {
552 assert_eq!(
553 Reader::new("(\\36)".as_bytes())
554 .read_without_context::<String<'_>>()
555 .unwrap()
556 .get(),
557 b"\x1e".to_vec()
558 );
559 }
560
561 #[test]
562 fn literal_string_10() {
563 assert_eq!(
564 Reader::new("(\\36ab)".as_bytes())
565 .read_without_context::<String<'_>>()
566 .unwrap()
567 .get(),
568 b"\x1eab".to_vec()
569 );
570 }
571
572 #[test]
573 fn literal_string_11() {
574 assert_eq!(
575 Reader::new("(\\00Y)".as_bytes())
576 .read_without_context::<String<'_>>()
577 .unwrap()
578 .get(),
579 b"\0Y".to_vec()
580 );
581 }
582
583 #[test]
584 fn literal_string_12() {
585 assert_eq!(
586 Reader::new("(\\0Y)".as_bytes())
587 .read_without_context::<String<'_>>()
588 .unwrap()
589 .get(),
590 b"\0Y".to_vec()
591 );
592 }
593
594 #[test]
595 fn literal_string_trailing() {
596 assert_eq!(
597 Reader::new("(Hi there.)abcde".as_bytes())
598 .read_without_context::<LiteralString<'_>>()
599 .unwrap()
600 .get()
601 .to_vec(),
602 b"Hi there.".to_vec()
603 );
604 }
605
606 #[test]
607 fn literal_string_invalid() {
608 assert_eq!(
609 Reader::new("(Hi \\778)".as_bytes())
610 .read_without_context::<LiteralString<'_>>()
611 .unwrap()
612 .get()
613 .to_vec(),
614 b"Hi \x3F8".to_vec()
615 );
616 }
617
618 #[test]
619 fn string_1() {
620 assert_eq!(
621 Reader::new("(Hi there.)".as_bytes())
622 .read_without_context::<String<'_>>()
623 .unwrap()
624 .get()
625 .to_vec(),
626 b"Hi there.".to_vec()
627 );
628 }
629
630 #[test]
631 fn string_2() {
632 assert_eq!(
633 Reader::new("<00010203>".as_bytes())
634 .read_without_context::<String<'_>>()
635 .unwrap()
636 .get(),
637 vec![0x00, 0x01, 0x02, 0x03]
638 );
639 }
640}