1use crate::filter::ascii_hex::decode_hex_string;
4use crate::object::macros::object;
5use crate::object::{Object, ObjectLike};
6use crate::reader::{Readable, Reader, ReaderContext, Skippable};
7use crate::trivia::is_white_space_character;
8use log::warn;
9use std::borrow::Cow;
10
11#[derive(PartialEq, Eq, Copy, Clone, Debug)]
13struct HexString<'a>(&'a [u8], bool);
14
15impl HexString<'_> {
16 fn get(&self) -> Vec<u8> {
18 if self.1 {
19 let mut cleaned = Vec::with_capacity(self.0.len() + 1);
20
21 for b in self.0.iter().copied() {
22 if !is_white_space_character(b) {
23 cleaned.push(b);
24 }
25 }
26
27 if cleaned.len() % 2 != 0 {
28 cleaned.push(b'0');
29 }
30
31 decode_hex_string(&cleaned).unwrap()
33 } else {
34 decode_hex_string(self.0).unwrap()
36 }
37 }
38}
39
40impl Skippable for HexString<'_> {
41 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
42 parse_hex(r).map(|_| {})
43 }
44}
45
46impl<'a> Readable<'a> for HexString<'a> {
47 fn read(r: &mut Reader<'a>, _: ReaderContext) -> Option<Self> {
48 let start = r.offset();
49 let mut dirty = parse_hex(r)?;
50 let end = r.offset();
51
52 let result = r.range(start + 1..end - 1).unwrap();
54 dirty |= result.len() % 2 != 0;
55
56 Some(HexString(result, dirty))
57 }
58}
59
60impl<'a> TryFrom<Object<'a>> for HexString<'a> {
61 type Error = ();
62
63 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
64 match value {
65 Object::String(String(InnerString::Hex(h))) => Ok(h),
66 _ => Err(()),
67 }
68 }
69}
70
71impl<'a> ObjectLike<'a> for HexString<'a> {}
72
73fn parse_hex(r: &mut Reader<'_>) -> Option<bool> {
74 let mut has_whitespace = false;
75
76 r.forward_tag(b"<")?;
77 while let Some(b) = r.peek_byte() {
78 let is_hex = b.is_ascii_hexdigit();
79 let is_whitespace = is_white_space_character(b);
80 has_whitespace |= is_whitespace;
81
82 if !is_hex && !is_whitespace {
83 break;
84 }
85
86 r.read_byte()?;
87 }
88 r.forward_tag(b">")?;
89
90 Some(has_whitespace)
91}
92
93#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
95struct LiteralString<'a>(&'a [u8], bool);
96
97impl<'a> LiteralString<'a> {
98 fn get(&self) -> Cow<'a, [u8]> {
100 if self.1 {
101 let mut cleaned = vec![];
102 let mut r = Reader::new(self.0);
103
104 while let Some(byte) = r.read_byte() {
105 match byte {
106 b'\\' => {
107 let next = r.read_byte().unwrap();
108
109 if is_octal_digit(next) {
110 let second = r.read_byte();
111 let third = r.read_byte();
112
113 match (second, third) {
114 (Some(n1), Some(n2)) => {
115 if is_octal_digit(n1) && is_octal_digit(n2) {
116 let bytes = [next, n1, n2];
117 let str = std::str::from_utf8(&bytes).unwrap();
118
119 if let Ok(num) = u8::from_str_radix(str, 8) {
120 cleaned.push(num);
121 } else {
122 warn!(
123 "overflow occurred while parsing octal literal string"
124 );
125 }
126 } else {
127 cleaned.push(next);
129 cleaned.push(n1);
130 cleaned.push(n2);
131 }
132 }
133 (Some(n1), None) => {
134 cleaned.push(next);
135 cleaned.push(n1);
136 }
137 _ => cleaned.push(next),
138 }
139 } else {
140 match next {
141 b'n' => cleaned.push(0xA),
142 b'r' => cleaned.push(0xD),
143 b't' => cleaned.push(0x9),
144 b'b' => cleaned.push(0x8),
145 b'f' => cleaned.push(0xC),
146 b'(' => cleaned.push(b'('),
147 b')' => cleaned.push(b')'),
148 b'\\' => cleaned.push(b'\\'),
149 b'\n' | b'\r' => {
150 r.skip_eol_characters();
156 }
157 _ => cleaned.push(next),
158 }
159 }
160 }
161 b'\n' | b'\r' => {
166 cleaned.push(b'\n');
167 r.skip_eol_characters();
168 }
169 other => cleaned.push(other),
170 }
171 }
172
173 Cow::Owned(cleaned)
174 } else {
175 Cow::Borrowed(self.0)
176 }
177 }
178}
179
180impl Skippable for LiteralString<'_> {
181 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
182 parse_literal(r).map(|_| ())
183 }
184}
185
186impl<'a> Readable<'a> for LiteralString<'a> {
187 fn read(r: &mut Reader<'a>, _: ReaderContext) -> Option<Self> {
188 let start = r.offset();
189 let dirty = parse_literal(r)?;
190 let end = r.offset();
191
192 let result = r.range(start + 1..end - 1).unwrap();
194
195 Some(LiteralString(result, dirty))
196 }
197}
198
199impl<'a> TryFrom<Object<'a>> for LiteralString<'a> {
200 type Error = ();
201
202 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
203 match value {
204 Object::String(String(InnerString::Literal(l))) => Ok(l),
205 _ => Err(()),
206 }
207 }
208}
209
210impl<'a> ObjectLike<'a> for LiteralString<'a> {}
211
212fn parse_literal(r: &mut Reader<'_>) -> Option<bool> {
213 r.forward_tag(b"(")?;
214 let mut bracket_counter = 1;
215 let mut dirty = false;
216
217 while bracket_counter > 0 {
218 let byte = r.read_byte()?;
219
220 match byte {
221 b'\\' => {
222 dirty = true;
223
224 let _ = r.read_byte()?;
225 }
226 b'(' => bracket_counter += 1,
227 b')' => bracket_counter -= 1,
228 b'\n' | b'\r' => dirty = true,
229 _ => {}
230 };
231 }
232
233 Some(dirty)
234}
235
236#[derive(Clone, Debug, PartialEq)]
237enum InnerString<'a> {
238 Hex(HexString<'a>),
239 Literal(LiteralString<'a>),
240}
241
242#[derive(Clone, Debug, PartialEq)]
244pub struct String<'a>(InnerString<'a>);
245
246impl<'a> String<'a> {
247 pub fn get(&self) -> Cow<'a, [u8]> {
249 match &self.0 {
250 InnerString::Hex(hex) => Cow::Owned(hex.get()),
251 InnerString::Literal(lit) => lit.get(),
252 }
253 }
254}
255
256impl<'a> From<HexString<'a>> for String<'a> {
257 fn from(value: HexString<'a>) -> Self {
258 Self(InnerString::Hex(value))
259 }
260}
261
262impl<'a> From<LiteralString<'a>> for String<'a> {
263 fn from(value: LiteralString<'a>) -> Self {
264 Self(InnerString::Literal(value))
265 }
266}
267
268object!(String<'a>, String);
269
270impl Skippable for String<'_> {
271 fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
272 match r.peek_byte()? {
273 b'<' => HexString::skip(r, is_content_stream),
274 b'(' => LiteralString::skip(r, is_content_stream),
275 _ => None,
276 }
277 }
278}
279
280impl<'a> Readable<'a> for String<'a> {
281 fn read(r: &mut Reader<'a>, _: ReaderContext) -> Option<Self> {
282 let inner = match r.peek_byte()? {
283 b'<' => InnerString::Hex(r.read_without_context::<HexString>()?),
284 b'(' => InnerString::Literal(r.read_without_context::<LiteralString>()?),
285 _ => return None,
286 };
287
288 Some(String(inner))
289 }
290}
291
292fn is_octal_digit(byte: u8) -> bool {
293 matches!(byte, b'0'..=b'7')
294}
295
296#[cfg(test)]
297mod tests {
298 use crate::object::string::{HexString, LiteralString, String};
299 use crate::reader::Reader;
300
301 #[test]
302 fn hex_string_empty() {
303 assert_eq!(
304 Reader::new("<>".as_bytes())
305 .read_without_context::<HexString>()
306 .unwrap()
307 .get(),
308 vec![]
309 );
310 }
311
312 #[test]
313 fn hex_string_1() {
314 assert_eq!(
315 Reader::new("<00010203>".as_bytes())
316 .read_without_context::<HexString>()
317 .unwrap()
318 .get(),
319 vec![0x00, 0x01, 0x02, 0x03]
320 );
321 }
322
323 #[test]
324 fn hex_string_2() {
325 assert_eq!(
326 Reader::new("<000102034>".as_bytes())
327 .read_without_context::<HexString>()
328 .unwrap()
329 .get(),
330 vec![0x00, 0x01, 0x02, 0x03, 0x40]
331 );
332 }
333
334 #[test]
335 fn hex_string_trailing_1() {
336 assert_eq!(
337 Reader::new("<000102034>dfgfg4".as_bytes())
338 .read_without_context::<HexString>()
339 .unwrap()
340 .get(),
341 vec![0x00, 0x01, 0x02, 0x03, 0x40]
342 );
343 }
344
345 #[test]
346 fn hex_string_trailing_2() {
347 assert_eq!(
348 Reader::new("<1 3 4>dfgfg4".as_bytes())
349 .read_without_context::<HexString>()
350 .unwrap()
351 .get(),
352 vec![0x13, 0x40]
353 );
354 }
355
356 #[test]
357 fn hex_string_trailing_3() {
358 assert_eq!(
359 Reader::new("<1>dfgfg4".as_bytes())
360 .read_without_context::<HexString>()
361 .unwrap()
362 .get(),
363 vec![0x10]
364 );
365 }
366
367 #[test]
368 fn hex_string_invalid_1() {
369 assert!(
370 Reader::new("<".as_bytes())
371 .read_without_context::<HexString>()
372 .is_none()
373 );
374 }
375
376 #[test]
377 fn hex_string_invalid_2() {
378 assert!(
379 Reader::new("34AD".as_bytes())
380 .read_without_context::<HexString>()
381 .is_none()
382 );
383 }
384
385 #[test]
386 fn literal_string_empty() {
387 assert_eq!(
388 Reader::new("()".as_bytes())
389 .read_without_context::<LiteralString>()
390 .unwrap()
391 .get()
392 .to_vec(),
393 b"".to_vec()
394 );
395 }
396
397 #[test]
398 fn literal_string_1() {
399 assert_eq!(
400 Reader::new("(Hi there.)".as_bytes())
401 .read_without_context::<LiteralString>()
402 .unwrap()
403 .get()
404 .to_vec(),
405 b"Hi there.".to_vec()
406 );
407 }
408
409 #[test]
410 fn literal_string_2() {
411 assert!(
412 Reader::new("(Hi \\777)".as_bytes())
413 .read_without_context::<LiteralString>()
414 .is_some()
415 );
416 }
417
418 #[test]
419 fn literal_string_3() {
420 assert_eq!(
421 Reader::new("(Hi ) there.)".as_bytes())
422 .read_without_context::<LiteralString>()
423 .unwrap()
424 .get()
425 .to_vec(),
426 b"Hi ".to_vec()
427 );
428 }
429
430 #[test]
431 fn literal_string_4() {
432 assert_eq!(
433 Reader::new("(Hi (()) there)".as_bytes())
434 .read_without_context::<LiteralString>()
435 .unwrap()
436 .get()
437 .to_vec(),
438 b"Hi (()) there".to_vec()
439 );
440 }
441
442 #[test]
443 fn literal_string_5() {
444 assert_eq!(
445 Reader::new("(Hi \\()".as_bytes())
446 .read_without_context::<LiteralString>()
447 .unwrap()
448 .get()
449 .to_vec(),
450 b"Hi (".to_vec()
451 );
452 }
453
454 #[test]
455 fn literal_string_6() {
456 assert_eq!(
457 Reader::new("(Hi \\\nthere)".as_bytes())
458 .read_without_context::<LiteralString>()
459 .unwrap()
460 .get()
461 .to_vec(),
462 b"Hi there".to_vec()
463 );
464 }
465
466 #[test]
467 fn literal_string_7() {
468 assert_eq!(
469 Reader::new("(Hi \\05354)".as_bytes())
470 .read_without_context::<LiteralString>()
471 .unwrap()
472 .get()
473 .to_vec(),
474 b"Hi +54".to_vec()
475 );
476 }
477
478 #[test]
479 fn literal_string_8() {
480 assert_eq!(
481 Reader::new("(\\3)".as_bytes())
482 .read_without_context::<String>()
483 .unwrap()
484 .get(),
485 b"3".to_vec()
486 )
487 }
488
489 #[test]
490 fn literal_string_9() {
491 assert_eq!(
492 Reader::new("(\\36)".as_bytes())
493 .read_without_context::<String>()
494 .unwrap()
495 .get(),
496 b"36".to_vec()
497 )
498 }
499
500 #[test]
501 fn literal_string_10() {
502 assert_eq!(
503 Reader::new("(\\36ab)".as_bytes())
504 .read_without_context::<String>()
505 .unwrap()
506 .get(),
507 b"36ab".to_vec()
508 )
509 }
510
511 #[test]
512 fn literal_string_trailing() {
513 assert_eq!(
514 Reader::new("(Hi there.)abcde".as_bytes())
515 .read_without_context::<LiteralString>()
516 .unwrap()
517 .get()
518 .to_vec(),
519 b"Hi there.".to_vec()
520 );
521 }
522
523 #[test]
524 fn literal_string_invalid() {
525 assert_eq!(
527 Reader::new("(Hi \\778)".as_bytes())
528 .read_without_context::<LiteralString>()
529 .unwrap()
530 .get()
531 .to_vec(),
532 b"Hi 778".to_vec()
533 );
534 }
535
536 #[test]
537 fn string_1() {
538 assert_eq!(
539 Reader::new("(Hi there.)".as_bytes())
540 .read_without_context::<String>()
541 .unwrap()
542 .get()
543 .to_vec(),
544 b"Hi there.".to_vec()
545 );
546 }
547
548 #[test]
549 fn string_2() {
550 assert_eq!(
551 Reader::new("<00010203>".as_bytes())
552 .read_without_context::<String>()
553 .unwrap()
554 .get(),
555 vec![0x00, 0x01, 0x02, 0x03]
556 );
557 }
558}