1use crate::filter::ascii_hex::decode_hex_string;
4use crate::object::macros::object;
5use crate::object::{Object, ObjectLike};
6use crate::reader::{Readable, Reader, ReaderContext, Skippable};
7use crate::trivia::is_white_space_character;
8use std::borrow::Cow;
9
10#[derive(PartialEq, Eq, Copy, Clone, Debug)]
12struct HexString<'a>(&'a [u8], bool);
13
14impl HexString<'_> {
15 fn get(&self) -> Vec<u8> {
17 if self.1 {
18 let mut cleaned = Vec::with_capacity(self.0.len() + 1);
19
20 for b in self.0.iter().copied() {
21 if !is_white_space_character(b) {
22 cleaned.push(b);
23 }
24 }
25
26 if cleaned.len() % 2 != 0 {
27 cleaned.push(b'0');
28 }
29
30 decode_hex_string(&cleaned).unwrap()
32 } else {
33 decode_hex_string(self.0).unwrap()
35 }
36 }
37}
38
39impl Skippable for HexString<'_> {
40 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
41 parse_hex(r).map(|_| {})
42 }
43}
44
45impl<'a> Readable<'a> for HexString<'a> {
46 fn read(r: &mut Reader<'a>, _: ReaderContext) -> Option<Self> {
47 let start = r.offset();
48 let mut dirty = parse_hex(r)?;
49 let end = r.offset();
50
51 let result = r.range(start + 1..end - 1).unwrap();
53 dirty |= result.len() % 2 != 0;
54
55 Some(HexString(result, dirty))
56 }
57}
58
59impl<'a> TryFrom<Object<'a>> for HexString<'a> {
60 type Error = ();
61
62 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
63 match value {
64 Object::String(String(InnerString::Hex(h))) => Ok(h),
65 _ => Err(()),
66 }
67 }
68}
69
70impl<'a> ObjectLike<'a> for HexString<'a> {}
71
72fn parse_hex(r: &mut Reader<'_>) -> Option<bool> {
73 let mut has_whitespace = false;
74
75 r.forward_tag(b"<")?;
76 while let Some(b) = r.peek_byte() {
77 let is_hex = b.is_ascii_hexdigit();
78 let is_whitespace = is_white_space_character(b);
79 has_whitespace |= is_whitespace;
80
81 if !is_hex && !is_whitespace {
82 break;
83 }
84
85 r.read_byte()?;
86 }
87 r.forward_tag(b">")?;
88
89 Some(has_whitespace)
90}
91
92#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
94struct LiteralString<'a>(&'a [u8], bool);
95
96impl<'a> LiteralString<'a> {
97 fn get(&self) -> Cow<'a, [u8]> {
99 if self.1 {
100 let mut cleaned = vec![];
101 let mut r = Reader::new(self.0);
102
103 while let Some(byte) = r.read_byte() {
104 match byte {
105 b'\\' => {
106 let next = r.read_byte().unwrap();
107
108 if is_octal_digit(next) {
109 let second = r.read_byte();
110 let third = r.read_byte();
111
112 match (second, third) {
113 (Some(n1), Some(n2)) => {
114 if is_octal_digit(n1) && is_octal_digit(n2) {
115 let bytes = [next, n1, n2];
116 let str = std::str::from_utf8(&bytes).unwrap();
117 let num = u8::from_str_radix(str, 8).unwrap();
118 cleaned.push(num);
119 } else {
120 cleaned.push(next);
122 cleaned.push(n1);
123 cleaned.push(n2);
124 }
125 }
126 (Some(n1), None) => {
127 cleaned.push(next);
128 cleaned.push(n1);
129 }
130 _ => cleaned.push(next),
131 }
132 } else {
133 match next {
134 b'n' => cleaned.push(0xA),
135 b'r' => cleaned.push(0xD),
136 b't' => cleaned.push(0x9),
137 b'b' => cleaned.push(0x8),
138 b'f' => cleaned.push(0xC),
139 b'(' => cleaned.push(b'('),
140 b')' => cleaned.push(b')'),
141 b'\\' => cleaned.push(b'\\'),
142 b'\n' | b'\r' => {
143 r.skip_eol_characters();
149 }
150 _ => cleaned.push(next),
151 }
152 }
153 }
154 b'\n' | b'\r' => {
159 cleaned.push(b'\n');
160 r.skip_eol_characters();
161 }
162 other => cleaned.push(other),
163 }
164 }
165
166 Cow::Owned(cleaned)
167 } else {
168 Cow::Borrowed(self.0)
169 }
170 }
171}
172
173impl Skippable for LiteralString<'_> {
174 fn skip(r: &mut Reader<'_>, _: bool) -> Option<()> {
175 parse_literal(r).map(|_| ())
176 }
177}
178
179impl<'a> Readable<'a> for LiteralString<'a> {
180 fn read(r: &mut Reader<'a>, _: ReaderContext) -> Option<Self> {
181 let start = r.offset();
182 let dirty = parse_literal(r)?;
183 let end = r.offset();
184
185 let result = r.range(start + 1..end - 1).unwrap();
187
188 Some(LiteralString(result, dirty))
189 }
190}
191
192impl<'a> TryFrom<Object<'a>> for LiteralString<'a> {
193 type Error = ();
194
195 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
196 match value {
197 Object::String(String(InnerString::Literal(l))) => Ok(l),
198 _ => Err(()),
199 }
200 }
201}
202
203impl<'a> ObjectLike<'a> for LiteralString<'a> {}
204
205fn parse_literal(r: &mut Reader<'_>) -> Option<bool> {
206 r.forward_tag(b"(")?;
207 let mut bracket_counter = 1;
208 let mut dirty = false;
209
210 while bracket_counter > 0 {
211 let byte = r.read_byte()?;
212
213 match byte {
214 b'\\' => {
215 dirty = true;
216
217 let _ = r.read_byte()?;
218 }
219 b'(' => bracket_counter += 1,
220 b')' => bracket_counter -= 1,
221 b'\n' | b'\r' => dirty = true,
222 _ => {}
223 };
224 }
225
226 Some(dirty)
227}
228
229#[derive(Clone, Debug, PartialEq)]
230enum InnerString<'a> {
231 Hex(HexString<'a>),
232 Literal(LiteralString<'a>),
233}
234
235#[derive(Clone, Debug, PartialEq)]
237pub struct String<'a>(InnerString<'a>);
238
239impl<'a> String<'a> {
240 pub fn get(&self) -> Cow<'a, [u8]> {
242 match &self.0 {
243 InnerString::Hex(hex) => Cow::Owned(hex.get()),
244 InnerString::Literal(lit) => lit.get(),
245 }
246 }
247}
248
249impl<'a> From<HexString<'a>> for String<'a> {
250 fn from(value: HexString<'a>) -> Self {
251 Self(InnerString::Hex(value))
252 }
253}
254
255impl<'a> From<LiteralString<'a>> for String<'a> {
256 fn from(value: LiteralString<'a>) -> Self {
257 Self(InnerString::Literal(value))
258 }
259}
260
261object!(String<'a>, String);
262
263impl Skippable for String<'_> {
264 fn skip(r: &mut Reader<'_>, is_content_stream: bool) -> Option<()> {
265 match r.peek_byte()? {
266 b'<' => HexString::skip(r, is_content_stream),
267 b'(' => LiteralString::skip(r, is_content_stream),
268 _ => None,
269 }
270 }
271}
272
273impl<'a> Readable<'a> for String<'a> {
274 fn read(r: &mut Reader<'a>, _: ReaderContext) -> Option<Self> {
275 let inner = match r.peek_byte()? {
276 b'<' => InnerString::Hex(r.read_without_context::<HexString>()?),
277 b'(' => InnerString::Literal(r.read_without_context::<LiteralString>()?),
278 _ => return None,
279 };
280
281 Some(String(inner))
282 }
283}
284
285fn is_octal_digit(byte: u8) -> bool {
286 matches!(byte, b'0'..=b'7')
287}
288
289#[cfg(test)]
290mod tests {
291 use crate::object::string::{HexString, LiteralString, String};
292 use crate::reader::Reader;
293
294 #[test]
295 fn hex_string_empty() {
296 assert_eq!(
297 Reader::new("<>".as_bytes())
298 .read_without_context::<HexString>()
299 .unwrap()
300 .get(),
301 vec![]
302 );
303 }
304
305 #[test]
306 fn hex_string_1() {
307 assert_eq!(
308 Reader::new("<00010203>".as_bytes())
309 .read_without_context::<HexString>()
310 .unwrap()
311 .get(),
312 vec![0x00, 0x01, 0x02, 0x03]
313 );
314 }
315
316 #[test]
317 fn hex_string_2() {
318 assert_eq!(
319 Reader::new("<000102034>".as_bytes())
320 .read_without_context::<HexString>()
321 .unwrap()
322 .get(),
323 vec![0x00, 0x01, 0x02, 0x03, 0x40]
324 );
325 }
326
327 #[test]
328 fn hex_string_trailing_1() {
329 assert_eq!(
330 Reader::new("<000102034>dfgfg4".as_bytes())
331 .read_without_context::<HexString>()
332 .unwrap()
333 .get(),
334 vec![0x00, 0x01, 0x02, 0x03, 0x40]
335 );
336 }
337
338 #[test]
339 fn hex_string_trailing_2() {
340 assert_eq!(
341 Reader::new("<1 3 4>dfgfg4".as_bytes())
342 .read_without_context::<HexString>()
343 .unwrap()
344 .get(),
345 vec![0x13, 0x40]
346 );
347 }
348
349 #[test]
350 fn hex_string_trailing_3() {
351 assert_eq!(
352 Reader::new("<1>dfgfg4".as_bytes())
353 .read_without_context::<HexString>()
354 .unwrap()
355 .get(),
356 vec![0x10]
357 );
358 }
359
360 #[test]
361 fn hex_string_invalid_1() {
362 assert!(
363 Reader::new("<".as_bytes())
364 .read_without_context::<HexString>()
365 .is_none()
366 );
367 }
368
369 #[test]
370 fn hex_string_invalid_2() {
371 assert!(
372 Reader::new("34AD".as_bytes())
373 .read_without_context::<HexString>()
374 .is_none()
375 );
376 }
377
378 #[test]
379 fn literal_string_empty() {
380 assert_eq!(
381 Reader::new("()".as_bytes())
382 .read_without_context::<LiteralString>()
383 .unwrap()
384 .get()
385 .to_vec(),
386 b"".to_vec()
387 );
388 }
389
390 #[test]
391 fn literal_string_1() {
392 assert_eq!(
393 Reader::new("(Hi there.)".as_bytes())
394 .read_without_context::<LiteralString>()
395 .unwrap()
396 .get()
397 .to_vec(),
398 b"Hi there.".to_vec()
399 );
400 }
401
402 #[test]
403 fn literal_string_2() {
404 assert!(
405 Reader::new("(Hi \\777)".as_bytes())
406 .read_without_context::<LiteralString>()
407 .is_some()
408 );
409 }
410
411 #[test]
412 fn literal_string_3() {
413 assert_eq!(
414 Reader::new("(Hi ) there.)".as_bytes())
415 .read_without_context::<LiteralString>()
416 .unwrap()
417 .get()
418 .to_vec(),
419 b"Hi ".to_vec()
420 );
421 }
422
423 #[test]
424 fn literal_string_4() {
425 assert_eq!(
426 Reader::new("(Hi (()) there)".as_bytes())
427 .read_without_context::<LiteralString>()
428 .unwrap()
429 .get()
430 .to_vec(),
431 b"Hi (()) there".to_vec()
432 );
433 }
434
435 #[test]
436 fn literal_string_5() {
437 assert_eq!(
438 Reader::new("(Hi \\()".as_bytes())
439 .read_without_context::<LiteralString>()
440 .unwrap()
441 .get()
442 .to_vec(),
443 b"Hi (".to_vec()
444 );
445 }
446
447 #[test]
448 fn literal_string_6() {
449 assert_eq!(
450 Reader::new("(Hi \\\nthere)".as_bytes())
451 .read_without_context::<LiteralString>()
452 .unwrap()
453 .get()
454 .to_vec(),
455 b"Hi there".to_vec()
456 );
457 }
458
459 #[test]
460 fn literal_string_7() {
461 assert_eq!(
462 Reader::new("(Hi \\05354)".as_bytes())
463 .read_without_context::<LiteralString>()
464 .unwrap()
465 .get()
466 .to_vec(),
467 b"Hi +54".to_vec()
468 );
469 }
470
471 #[test]
472 fn literal_string_8() {
473 assert_eq!(
474 Reader::new("(\\3)".as_bytes())
475 .read_without_context::<String>()
476 .unwrap()
477 .get(),
478 b"3".to_vec()
479 )
480 }
481
482 #[test]
483 fn literal_string_9() {
484 assert_eq!(
485 Reader::new("(\\36)".as_bytes())
486 .read_without_context::<String>()
487 .unwrap()
488 .get(),
489 b"36".to_vec()
490 )
491 }
492
493 #[test]
494 fn literal_string_10() {
495 assert_eq!(
496 Reader::new("(\\36ab)".as_bytes())
497 .read_without_context::<String>()
498 .unwrap()
499 .get(),
500 b"36ab".to_vec()
501 )
502 }
503
504 #[test]
505 fn literal_string_trailing() {
506 assert_eq!(
507 Reader::new("(Hi there.)abcde".as_bytes())
508 .read_without_context::<LiteralString>()
509 .unwrap()
510 .get()
511 .to_vec(),
512 b"Hi there.".to_vec()
513 );
514 }
515
516 #[test]
517 fn literal_string_invalid() {
518 assert_eq!(
520 Reader::new("(Hi \\778)".as_bytes())
521 .read_without_context::<LiteralString>()
522 .unwrap()
523 .get()
524 .to_vec(),
525 b"Hi 778".to_vec()
526 );
527 }
528
529 #[test]
530 fn string_1() {
531 assert_eq!(
532 Reader::new("(Hi there.)".as_bytes())
533 .read_without_context::<String>()
534 .unwrap()
535 .get()
536 .to_vec(),
537 b"Hi there.".to_vec()
538 );
539 }
540
541 #[test]
542 fn string_2() {
543 assert_eq!(
544 Reader::new("<00010203>".as_bytes())
545 .read_without_context::<String>()
546 .unwrap()
547 .get(),
548 vec![0x00, 0x01, 0x02, 0x03]
549 );
550 }
551}