1use crate::crypto::DecryptionTarget;
4use crate::filter::ascii_hex;
5use crate::object::Object;
6use crate::object::macros::object;
7use crate::reader::Reader;
8use crate::reader::{Readable, ReaderContext, ReaderExt, Skippable};
9use crate::trivia::is_white_space_character;
10use alloc::vec::Vec;
11use core::borrow::Borrow;
12use core::hash::{Hash, Hasher};
13use core::ops::Deref;
14use smallvec::SmallVec;
15
16#[derive(Clone)]
17enum StringInner<'a> {
18 Borrowed(&'a [u8]),
19 Owned(SmallVec<[u8; 23]>),
20}
21
22impl AsRef<[u8]> for StringInner<'_> {
23 fn as_ref(&self) -> &[u8] {
24 match self {
25 Self::Borrowed(data) => data,
26 Self::Owned(data) => data,
27 }
28 }
29}
30
31#[derive(Clone)]
33pub struct String<'a>(StringInner<'a>);
34
35impl<'a> String<'a> {
36 pub fn as_bytes(&self) -> &[u8] {
38 self.as_ref()
39 }
40}
41
42impl Deref for String<'_> {
43 type Target = [u8];
44
45 fn deref(&self) -> &Self::Target {
46 self.as_ref()
47 }
48}
49
50impl AsRef<[u8]> for String<'_> {
51 fn as_ref(&self) -> &[u8] {
52 match &self.0 {
53 StringInner::Borrowed(data) => data,
54 StringInner::Owned(data) => data,
55 }
56 }
57}
58
59impl Borrow<[u8]> for String<'_> {
60 fn borrow(&self) -> &[u8] {
61 self.as_ref()
62 }
63}
64
65impl PartialEq for String<'_> {
66 fn eq(&self, other: &Self) -> bool {
67 self.as_ref() == other.as_ref()
68 }
69}
70
71impl Eq for String<'_> {}
72
73impl Hash for String<'_> {
74 fn hash<H: Hasher>(&self, state: &mut H) {
75 self.as_ref().hash(state);
76 }
77}
78
79impl core::fmt::Debug for String<'_> {
80 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
81 <[u8] as core::fmt::Debug>::fmt(self.as_ref(), f)
82 }
83}
84
85object!(String<'a>, String);
86
87impl Skippable for String<'_> {
88 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
89 match r.peek_byte()? {
90 b'<' => skip_hex(r),
91 b'(' => skip_literal(r),
92 _ => None,
93 }
94 }
95}
96
97impl<'a> Readable<'a> for String<'a> {
98 fn read(r: &mut Reader<'a>, ctx: &ReaderContext<'a>) -> Option<Self> {
99 let decoded = match r.peek_byte()? {
100 b'<' => StringInner::Owned(read_hex(r)?),
101 b'(' => read_literal(r)?,
102 _ => return None,
103 };
104
105 let final_data = if ctx.xref().needs_decryption(ctx) {
107 if let Some(obj_number) = ctx.obj_number() {
108 ctx.xref()
109 .decrypt(obj_number, decoded.as_ref(), DecryptionTarget::String)
110 .map(StringInner::from)
111 .unwrap_or(decoded)
112 } else {
113 decoded
114 }
115 } else {
116 decoded
117 };
118
119 Some(Self(final_data))
120 }
121}
122
123impl From<Vec<u8>> for StringInner<'_> {
124 fn from(value: Vec<u8>) -> Self {
125 Self::Owned(SmallVec::from_vec(value))
126 }
127}
128
129fn skip_hex(r: &mut Reader<'_>) -> Option<()> {
130 r.forward_tag(b"<")?;
131 while let Some(b) = r.peek_byte() {
132 let is_hex = b.is_ascii_hexdigit();
133 let is_whitespace = is_white_space_character(b);
134
135 if !is_hex && !is_whitespace {
136 break;
137 }
138
139 r.read_byte()?;
140 }
141 r.forward_tag(b">")?;
142
143 Some(())
144}
145
146fn read_hex(r: &mut Reader<'_>) -> Option<SmallVec<[u8; 23]>> {
147 let start = r.offset();
148 skip_hex(r)?;
149 let end = r.offset();
150
151 let raw = r.range(start + 1..end - 1)?;
153 let decoded = ascii_hex::decode_into(raw)?;
154
155 Some(decoded)
156}
157
158fn skip_literal(r: &mut Reader<'_>) -> Option<()> {
159 r.forward_tag(b"(")?;
160 let mut bracket_counter = 1;
161
162 while bracket_counter > 0 {
163 let byte = r.read_byte()?;
164
165 match byte {
166 b'\\' => {
167 let _ = r.read_byte()?;
168 }
169 b'(' => bracket_counter += 1,
170 b')' => bracket_counter -= 1,
171 _ => {}
172 };
173 }
174
175 Some(())
176}
177
178fn read_literal<'a>(r: &mut Reader<'a>) -> Option<StringInner<'a>> {
179 let start = r.offset();
180 skip_literal(r)?;
181 let end = r.offset();
182
183 let data = r.range(start + 1..end - 1)?;
185
186 if !data.iter().any(|b| matches!(b, b'\\' | b'\n' | b'\r')) {
187 return Some(StringInner::Borrowed(data));
188 }
189
190 let mut r = Reader::new(data);
191 let mut result = SmallVec::new();
192
193 while let Some(byte) = r.read_byte() {
194 match byte {
195 b'\\' => {
196 let next = r.read_byte()?;
197
198 if is_octal_digit(next) {
199 let second = r.read_byte();
200 let third = r.read_byte();
201
202 let bytes = match (second, third) {
203 (Some(n1), Some(n2)) => match (is_octal_digit(n1), is_octal_digit(n2)) {
204 (true, true) => [next, n1, n2],
205 (true, _) => {
206 r.jump(r.offset() - 1);
207 [b'0', next, n1]
208 }
209 _ => {
210 r.jump(r.offset() - 2);
211 [b'0', b'0', next]
212 }
213 },
214 (Some(n1), None) => {
215 if is_octal_digit(n1) {
216 [b'0', next, n1]
217 } else {
218 r.jump(r.offset() - 1);
219 [b'0', b'0', next]
220 }
221 }
222 _ => [b'0', b'0', next],
223 };
224
225 let str = core::str::from_utf8(&bytes).unwrap();
226
227 if let Ok(num) = u8::from_str_radix(str, 8) {
228 result.push(num);
229 } else {
230 warn!("overflow occurred while parsing octal literal string");
231 }
232 } else {
233 match next {
234 b'n' => result.push(0xA),
235 b'r' => result.push(0xD),
236 b't' => result.push(0x9),
237 b'b' => result.push(0x8),
238 b'f' => result.push(0xC),
239 b'(' => result.push(b'('),
240 b')' => result.push(b')'),
241 b'\\' => result.push(b'\\'),
242 b'\n' | b'\r' => {
243 r.skip_eol_characters();
249 }
250 _ => result.push(next),
251 }
252 }
253 }
254 b'(' | b')' => result.push(byte),
255 b'\n' | b'\r' => {
260 result.push(b'\n');
261 r.skip_eol_characters();
262 }
263 other => result.push(other),
264 }
265 }
266
267 Some(StringInner::Owned(result))
268}
269
270fn is_octal_digit(byte: u8) -> bool {
271 matches!(byte, b'0'..=b'7')
272}
273
274#[cfg(test)]
275mod tests {
276 use crate::object::String;
277 use crate::reader::Reader;
278 use crate::reader::ReaderExt;
279
280 #[test]
281 fn hex_string_empty() {
282 assert_eq!(
283 Reader::new(b"<>")
284 .read_without_context::<String<'_>>()
285 .unwrap()
286 .as_bytes(),
287 b""
288 );
289 }
290
291 #[test]
292 fn hex_string_1() {
293 assert_eq!(
294 Reader::new(b"<00010203>")
295 .read_without_context::<String<'_>>()
296 .unwrap()
297 .as_bytes(),
298 &[0x00, 0x01, 0x02, 0x03]
299 );
300 }
301
302 #[test]
303 fn hex_string_2() {
304 assert_eq!(
305 Reader::new(b"<000102034>")
306 .read_without_context::<String<'_>>()
307 .unwrap()
308 .as_bytes(),
309 &[0x00, 0x01, 0x02, 0x03, 0x40]
310 );
311 }
312
313 #[test]
314 fn hex_string_trailing_1() {
315 assert_eq!(
316 Reader::new(b"<000102034>dfgfg4")
317 .read_without_context::<String<'_>>()
318 .unwrap()
319 .as_bytes(),
320 &[0x00, 0x01, 0x02, 0x03, 0x40]
321 );
322 }
323
324 #[test]
325 fn hex_string_trailing_2() {
326 assert_eq!(
327 Reader::new(b"<1 3 4>dfgfg4")
328 .read_without_context::<String<'_>>()
329 .unwrap()
330 .as_bytes(),
331 &[0x13, 0x40]
332 );
333 }
334
335 #[test]
336 fn hex_string_trailing_3() {
337 assert_eq!(
338 Reader::new(b"<1>dfgfg4")
339 .read_without_context::<String<'_>>()
340 .unwrap()
341 .as_bytes(),
342 &[0x10]
343 );
344 }
345
346 #[test]
347 fn hex_string_invalid_1() {
348 assert!(
349 Reader::new(b"<")
350 .read_without_context::<String<'_>>()
351 .is_none()
352 );
353 }
354
355 #[test]
356 fn hex_string_invalid_2() {
357 assert!(
358 Reader::new(b"34AD")
359 .read_without_context::<String<'_>>()
360 .is_none()
361 );
362 }
363
364 #[test]
365 fn literal_string_empty() {
366 assert_eq!(
367 Reader::new(b"()")
368 .read_without_context::<String<'_>>()
369 .unwrap()
370 .as_bytes(),
371 b""
372 );
373 }
374
375 #[test]
376 fn literal_string_1() {
377 assert_eq!(
378 Reader::new(b"(Hi there.)")
379 .read_without_context::<String<'_>>()
380 .unwrap()
381 .as_bytes(),
382 b"Hi there."
383 );
384 }
385
386 #[test]
387 fn literal_string_2() {
388 assert!(
389 Reader::new(b"(Hi \\777)")
390 .read_without_context::<String<'_>>()
391 .is_some()
392 );
393 }
394
395 #[test]
396 fn literal_string_3() {
397 assert_eq!(
398 Reader::new(b"(Hi ) there.)")
399 .read_without_context::<String<'_>>()
400 .unwrap()
401 .as_bytes(),
402 b"Hi "
403 );
404 }
405
406 #[test]
407 fn literal_string_4() {
408 assert_eq!(
409 Reader::new(b"(Hi (()) there)")
410 .read_without_context::<String<'_>>()
411 .unwrap()
412 .as_bytes(),
413 b"Hi (()) there"
414 );
415 }
416
417 #[test]
418 fn literal_string_5() {
419 assert_eq!(
420 Reader::new(b"(Hi \\()")
421 .read_without_context::<String<'_>>()
422 .unwrap()
423 .as_bytes(),
424 b"Hi ("
425 );
426 }
427
428 #[test]
429 fn literal_string_6() {
430 assert_eq!(
431 Reader::new(b"(Hi \\\nthere)")
432 .read_without_context::<String<'_>>()
433 .unwrap()
434 .as_bytes(),
435 b"Hi there"
436 );
437 }
438
439 #[test]
440 fn literal_string_7() {
441 assert_eq!(
442 Reader::new(b"(Hi \\05354)")
443 .read_without_context::<String<'_>>()
444 .unwrap()
445 .as_bytes(),
446 b"Hi +54"
447 );
448 }
449
450 #[test]
451 fn literal_string_8() {
452 assert_eq!(
453 Reader::new(b"(\\3)")
454 .read_without_context::<String<'_>>()
455 .unwrap()
456 .as_bytes(),
457 b"\x03"
458 );
459 }
460
461 #[test]
462 fn literal_string_9() {
463 assert_eq!(
464 Reader::new(b"(\\36)")
465 .read_without_context::<String<'_>>()
466 .unwrap()
467 .as_bytes(),
468 b"\x1e"
469 );
470 }
471
472 #[test]
473 fn literal_string_10() {
474 assert_eq!(
475 Reader::new(b"(\\36ab)")
476 .read_without_context::<String<'_>>()
477 .unwrap()
478 .as_bytes(),
479 b"\x1eab"
480 );
481 }
482
483 #[test]
484 fn literal_string_11() {
485 assert_eq!(
486 Reader::new(b"(\\00Y)")
487 .read_without_context::<String<'_>>()
488 .unwrap()
489 .as_bytes(),
490 b"\0Y"
491 );
492 }
493
494 #[test]
495 fn literal_string_12() {
496 assert_eq!(
497 Reader::new(b"(\\0Y)")
498 .read_without_context::<String<'_>>()
499 .unwrap()
500 .as_bytes(),
501 b"\0Y"
502 );
503 }
504
505 #[test]
506 fn literal_string_trailing() {
507 assert_eq!(
508 Reader::new(b"(Hi there.)abcde")
509 .read_without_context::<String<'_>>()
510 .unwrap()
511 .as_bytes(),
512 b"Hi there."
513 );
514 }
515
516 #[test]
517 fn literal_string_invalid() {
518 assert_eq!(
519 Reader::new(b"(Hi \\778)")
520 .read_without_context::<String<'_>>()
521 .unwrap()
522 .as_bytes(),
523 b"Hi \x3F8"
524 );
525 }
526
527 #[test]
528 fn string_1() {
529 assert_eq!(
530 Reader::new(b"(Hi there.)")
531 .read_without_context::<String<'_>>()
532 .unwrap()
533 .as_bytes(),
534 b"Hi there."
535 );
536 }
537
538 #[test]
539 fn string_2() {
540 assert_eq!(
541 Reader::new(b"<00010203>")
542 .read_without_context::<String<'_>>()
543 .unwrap()
544 .as_bytes(),
545 &[0x00, 0x01, 0x02, 0x03]
546 );
547 }
548}