1use crate::object::macros::object;
4use crate::object::{Object, ObjectLike};
5use crate::reader::{Readable, Reader, Skippable};
6use crate::trivia::is_white_space_character;
7use crate::xref::XRef;
8use std::borrow::Cow;
9
10#[derive(PartialEq, Eq, Copy, Clone, Debug)]
12pub struct HexString<'a>(&'a [u8], bool);
13
14impl HexString<'_> {
15 pub fn get(&self) -> Vec<u8> {
17 if self.1 {
18 let mut cleaned = Vec::with_capacity(self.0.len() + 1);
19
20 for b in self.0.iter().copied() {
21 if !is_white_space_character(b) {
22 cleaned.push(b);
23 }
24 }
25
26 if cleaned.len() % 2 != 0 {
27 cleaned.push(b'0');
28 }
29
30 hex::decode(cleaned).unwrap()
32 } else {
33 hex::decode(self.0).unwrap()
35 }
36 }
37}
38
39impl Skippable for HexString<'_> {
40 fn skip<const PLAIN: bool>(r: &mut Reader<'_>) -> Option<()> {
41 parse_hex(r).map(|_| {})
42 }
43}
44
45impl<'a> Readable<'a> for HexString<'a> {
46 fn read<const PLAIN: bool>(r: &mut Reader<'a>, _: &'a XRef) -> Option<Self> {
47 let start = r.offset();
48 let mut dirty = parse_hex(r)?;
49 let end = r.offset();
50
51 let result = r.range(start + 1..end - 1).unwrap();
53 dirty |= result.len() % 2 != 0;
54
55 Some(HexString(result, dirty))
56 }
57}
58
59impl<'a> TryFrom<Object<'a>> for HexString<'a> {
60 type Error = ();
61
62 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
63 match value {
64 Object::String(String(InnerString::Hex(h))) => Ok(h),
65 _ => Err(()),
66 }
67 }
68}
69
70impl<'a> ObjectLike<'a> for HexString<'a> {}
71
72fn parse_hex(r: &mut Reader<'_>) -> Option<bool> {
73 let mut has_whitespace = false;
74
75 r.forward_tag(b"<")?;
76 while let Some(b) = r.peek_byte() {
77 let is_hex = b.is_ascii_hexdigit();
78 let is_whitespace = is_white_space_character(b);
79 has_whitespace |= is_whitespace;
80
81 if !is_hex && !is_whitespace {
82 break;
83 }
84
85 r.read_byte()?;
86 }
87 r.forward_tag(b">")?;
88
89 Some(has_whitespace)
90}
91
92#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
94pub struct LiteralString<'a>(&'a [u8], bool);
95
96impl<'a> LiteralString<'a> {
97 pub fn get(&self) -> Cow<'a, [u8]> {
99 if self.1 {
100 let mut cleaned = vec![];
101 let mut r = Reader::new(self.0);
102
103 while let Some(byte) = r.read_byte() {
104 match byte {
105 b'\\' => {
106 let next = r.read_byte().unwrap();
107
108 if is_octal_digit(next) {
109 let second = r.read_byte().unwrap();
110 let third = r.read_byte().unwrap();
111 let bytes = [next, second, third];
112 let str = std::str::from_utf8(&bytes).unwrap();
113 let num = u8::from_str_radix(str, 8).unwrap();
114 cleaned.push(num);
115 } else {
116 match next {
117 b'n' => cleaned.push(0xA),
118 b'r' => cleaned.push(0xD),
119 b't' => cleaned.push(0x9),
120 b'b' => cleaned.push(0x8),
121 b'f' => cleaned.push(0xC),
122 b'(' => cleaned.push(b'('),
123 b')' => cleaned.push(b')'),
124 b'\\' => cleaned.push(b'\\'),
125 b'\n' | b'\r' => {
126 r.skip_eol_characters();
132 }
133 _ => unreachable!(),
134 }
135 }
136 }
137 b'\n' | b'\r' => {
142 cleaned.push(b'\n');
143 r.skip_eol_characters();
144 }
145 other => cleaned.push(other),
146 }
147 }
148
149 Cow::Owned(cleaned)
150 } else {
151 Cow::Borrowed(self.0)
152 }
153 }
154}
155
156impl Skippable for LiteralString<'_> {
157 fn skip<const PLAIN: bool>(r: &mut Reader<'_>) -> Option<()> {
158 parse_literal(r).map(|_| ())
159 }
160}
161
162impl<'a> Readable<'a> for LiteralString<'a> {
163 fn read<const PLAIN: bool>(r: &mut Reader<'a>, _: &XRef) -> Option<Self> {
164 let start = r.offset();
165 let dirty = parse_literal(r)?;
166 let end = r.offset();
167
168 let result = r.range(start + 1..end - 1).unwrap();
170
171 Some(LiteralString(result, dirty))
172 }
173}
174
175impl<'a> TryFrom<Object<'a>> for LiteralString<'a> {
176 type Error = ();
177
178 fn try_from(value: Object<'a>) -> Result<Self, Self::Error> {
179 match value {
180 Object::String(String(InnerString::Literal(l))) => Ok(l),
181 _ => Err(()),
182 }
183 }
184}
185
186impl<'a> ObjectLike<'a> for LiteralString<'a> {}
187
188fn parse_literal(r: &mut Reader<'_>) -> Option<bool> {
189 r.forward_tag(b"(")?;
190 let mut bracket_counter = 1;
191 let mut dirty = false;
192
193 while bracket_counter > 0 {
194 let byte = r.read_byte()?;
195
196 match byte {
197 b'\\' => {
198 dirty = true;
199
200 let next = r.read_byte()?;
201 if is_octal_digit(next) {
202 r.eat(|b| is_octal_digit(b))?;
203 r.eat(|b| is_octal_digit(b))?;
204 } else if !matches!(
205 next,
206 b'n' | b'r' | b't' | b'b' | b'f' | b'(' | b')' | b'\\' | b'\n' | b'\r'
207 ) {
208 return None;
209 }
210 }
211 b'(' => bracket_counter += 1,
212 b')' => bracket_counter -= 1,
213 b'\n' | b'\r' => dirty = true,
214 _ => {}
215 };
216 }
217
218 Some(dirty)
219}
220
221#[derive(Clone, Debug, PartialEq)]
222enum InnerString<'a> {
223 Hex(HexString<'a>),
224 Literal(LiteralString<'a>),
225}
226
227#[derive(Clone, Debug, PartialEq)]
229pub struct String<'a>(InnerString<'a>);
230
231impl<'a> String<'a> {
232 pub fn get(&self) -> Cow<'a, [u8]> {
234 match &self.0 {
235 InnerString::Hex(hex) => Cow::Owned(hex.get()),
236 InnerString::Literal(lit) => lit.get(),
237 }
238 }
239}
240
241impl<'a> From<HexString<'a>> for String<'a> {
242 fn from(value: HexString<'a>) -> Self {
243 Self(InnerString::Hex(value))
244 }
245}
246
247impl<'a> From<LiteralString<'a>> for String<'a> {
248 fn from(value: LiteralString<'a>) -> Self {
249 Self(InnerString::Literal(value))
250 }
251}
252
253object!(String<'a>, String);
254
255impl Skippable for String<'_> {
256 fn skip<const PLAIN: bool>(r: &mut Reader<'_>) -> Option<()> {
257 match r.peek_byte()? {
258 b'<' => HexString::skip::<true>(r),
259 b'(' => LiteralString::skip::<true>(r),
260 _ => None,
261 }
262 }
263}
264
265impl<'a> Readable<'a> for String<'a> {
266 fn read<const PLAIN: bool>(r: &mut Reader<'a>, _: &'a XRef) -> Option<Self> {
267 let inner = match r.peek_byte()? {
268 b'<' => InnerString::Hex(r.read_without_xref::<HexString>()?),
269 b'(' => InnerString::Literal(r.read_without_xref::<LiteralString>()?),
270 _ => return None,
271 };
272
273 Some(String(inner))
274 }
275}
276
277fn is_octal_digit(byte: u8) -> bool {
278 matches!(byte, b'0'..=b'7')
279}
280
281#[cfg(test)]
282mod tests {
283 use crate::object::string::{HexString, LiteralString, String};
284 use crate::reader::Reader;
285
286 #[test]
287 fn hex_string_empty() {
288 assert_eq!(
289 Reader::new("<>".as_bytes())
290 .read_without_xref::<HexString>()
291 .unwrap()
292 .get(),
293 vec![]
294 );
295 }
296
297 #[test]
298 fn hex_string_1() {
299 assert_eq!(
300 Reader::new("<00010203>".as_bytes())
301 .read_without_xref::<HexString>()
302 .unwrap()
303 .get(),
304 vec![0x00, 0x01, 0x02, 0x03]
305 );
306 }
307
308 #[test]
309 fn hex_string_2() {
310 assert_eq!(
311 Reader::new("<000102034>".as_bytes())
312 .read_without_xref::<HexString>()
313 .unwrap()
314 .get(),
315 vec![0x00, 0x01, 0x02, 0x03, 0x40]
316 );
317 }
318
319 #[test]
320 fn hex_string_trailing_1() {
321 assert_eq!(
322 Reader::new("<000102034>dfgfg4".as_bytes())
323 .read_without_xref::<HexString>()
324 .unwrap()
325 .get(),
326 vec![0x00, 0x01, 0x02, 0x03, 0x40]
327 );
328 }
329
330 #[test]
331 fn hex_string_trailing_2() {
332 assert_eq!(
333 Reader::new("<1 3 4>dfgfg4".as_bytes())
334 .read_without_xref::<HexString>()
335 .unwrap()
336 .get(),
337 vec![0x13, 0x40]
338 );
339 }
340
341 #[test]
342 fn hex_string_trailing_3() {
343 assert_eq!(
344 Reader::new("<1>dfgfg4".as_bytes())
345 .read_without_xref::<HexString>()
346 .unwrap()
347 .get(),
348 vec![0x10]
349 );
350 }
351
352 #[test]
353 fn hex_string_invalid_1() {
354 assert_eq!(
355 Reader::new("<".as_bytes())
356 .read_without_xref::<HexString>()
357 .is_none(),
358 true
359 );
360 }
361
362 #[test]
363 fn hex_string_invalid_2() {
364 assert_eq!(
365 Reader::new("34AD".as_bytes())
366 .read_without_xref::<HexString>()
367 .is_none(),
368 true
369 );
370 }
371
372 #[test]
373 fn literal_string_empty() {
374 assert_eq!(
375 Reader::new("()".as_bytes())
376 .read_without_xref::<LiteralString>()
377 .unwrap()
378 .get()
379 .to_vec(),
380 b"".to_vec()
381 );
382 }
383
384 #[test]
385 fn literal_string_1() {
386 assert_eq!(
387 Reader::new("(Hi there.)".as_bytes())
388 .read_without_xref::<LiteralString>()
389 .unwrap()
390 .get()
391 .to_vec(),
392 b"Hi there.".to_vec()
393 );
394 }
395
396 #[test]
397 fn literal_string_2() {
398 assert!(
399 Reader::new("(Hi \\777)".as_bytes())
400 .read_without_xref::<LiteralString>()
401 .is_some()
402 );
403 }
404
405 #[test]
406 fn literal_string_3() {
407 assert_eq!(
408 Reader::new("(Hi ) there.)".as_bytes())
409 .read_without_xref::<LiteralString>()
410 .unwrap()
411 .get()
412 .to_vec(),
413 b"Hi ".to_vec()
414 );
415 }
416
417 #[test]
418 fn literal_string_4() {
419 assert_eq!(
420 Reader::new("(Hi (()) there)".as_bytes())
421 .read_without_xref::<LiteralString>()
422 .unwrap()
423 .get()
424 .to_vec(),
425 b"Hi (()) there".to_vec()
426 );
427 }
428
429 #[test]
430 fn literal_string_5() {
431 assert_eq!(
432 Reader::new("(Hi \\()".as_bytes())
433 .read_without_xref::<LiteralString>()
434 .unwrap()
435 .get()
436 .to_vec(),
437 b"Hi (".to_vec()
438 );
439 }
440
441 #[test]
442 fn literal_string_6() {
443 assert_eq!(
444 Reader::new("(Hi \\\nthere)".as_bytes())
445 .read_without_xref::<LiteralString>()
446 .unwrap()
447 .get()
448 .to_vec(),
449 b"Hi there".to_vec()
450 );
451 }
452
453 #[test]
454 fn literal_string_7() {
455 assert_eq!(
456 Reader::new("(Hi \\05354)".as_bytes())
457 .read_without_xref::<LiteralString>()
458 .unwrap()
459 .get()
460 .to_vec(),
461 b"Hi +54".to_vec()
462 );
463 }
464
465 #[test]
466 fn literal_string_trailing() {
467 assert_eq!(
468 Reader::new("(Hi there.)abcde".as_bytes())
469 .read_without_xref::<LiteralString>()
470 .unwrap()
471 .get()
472 .to_vec(),
473 b"Hi there.".to_vec()
474 );
475 }
476
477 #[test]
478 fn literal_string_invalid() {
479 assert!(
480 Reader::new("(Hi \\778)".as_bytes())
481 .read_without_xref::<LiteralString>()
482 .is_none()
483 );
484 }
485
486 #[test]
487 fn string_1() {
488 assert_eq!(
489 Reader::new("(Hi there.)".as_bytes())
490 .read_without_xref::<String>()
491 .unwrap()
492 .get()
493 .to_vec(),
494 b"Hi there.".to_vec()
495 );
496 }
497
498 #[test]
499 fn string_2() {
500 assert_eq!(
501 Reader::new("<00010203>".as_bytes())
502 .read_without_xref::<String>()
503 .unwrap()
504 .get(),
505 vec![0x00, 0x01, 0x02, 0x03]
506 );
507 }
508}