1use rpdfium_core::error::{ParseError, PdfError};
12use rpdfium_core::{Name, PdfString};
13
14use crate::object::ObjectId;
15
16#[derive(Debug, Clone, PartialEq)]
18pub enum Token {
19 Integer(i64),
21 Real(f64),
23 String(PdfString),
25 Name(Name),
27 Boolean(bool),
29 Null,
31 ArrayStart,
33 ArrayEnd,
35 DictStart,
37 DictEnd,
39 Ref(ObjectId),
41 Keyword(Vec<u8>),
43 Comment(Vec<u8>),
45}
46
47pub struct Tokenizer<'a> {
49 source: &'a [u8],
50 pos: usize,
51}
52
53impl<'a> Tokenizer<'a> {
54 pub fn new(source: &'a [u8]) -> Self {
56 Self { source, pos: 0 }
57 }
58
59 pub fn new_at(source: &'a [u8], pos: usize) -> Self {
61 Self { source, pos }
62 }
63
64 pub fn position(&self) -> usize {
66 self.pos
67 }
68
69 pub fn set_position(&mut self, pos: usize) {
71 self.pos = pos;
72 }
73
74 pub fn source(&self) -> &'a [u8] {
76 self.source
77 }
78
79 pub fn skip_whitespace_and_comments(&mut self) {
81 loop {
82 while self.pos < self.source.len() && is_whitespace(self.source[self.pos]) {
84 self.pos += 1;
85 }
86 if self.pos < self.source.len() && self.source[self.pos] == b'%' {
88 while self.pos < self.source.len()
89 && self.source[self.pos] != b'\r'
90 && self.source[self.pos] != b'\n'
91 {
92 self.pos += 1;
93 }
94 } else {
95 break;
96 }
97 }
98 }
99
100 pub fn next_token(&mut self) -> Option<Result<Token, PdfError>> {
102 self.skip_whitespace_and_comments();
103
104 if self.pos >= self.source.len() {
105 return None;
106 }
107
108 let offset = self.pos as u64;
109 let b = self.source[self.pos];
110
111 let result = match b {
112 b'[' => {
113 self.pos += 1;
114 Ok(Token::ArrayStart)
115 }
116 b']' => {
117 self.pos += 1;
118 Ok(Token::ArrayEnd)
119 }
120 b'<' => {
121 if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == b'<' {
122 self.pos += 2;
123 Ok(Token::DictStart)
124 } else {
125 self.read_hex_string(offset)
126 }
127 }
128 b'>' => {
129 if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == b'>' {
130 self.pos += 2;
131 Ok(Token::DictEnd)
132 } else {
133 Err(PdfError::Parse(ParseError::UnexpectedToken {
134 offset,
135 expected: ">>".into(),
136 found: ">".into(),
137 }))
138 }
139 }
140 b'(' => self.read_literal_string(offset),
141 b'/' => self.read_name(offset),
142 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(offset),
143 _ if b.is_ascii_alphabetic() => self.read_keyword_or_bool(offset),
144 _ => {
145 self.pos += 1;
146 Err(PdfError::Parse(ParseError::UnexpectedToken {
147 offset,
148 expected: "token".into(),
149 found: format!("byte 0x{:02x}", b),
150 }))
151 }
152 };
153
154 Some(result)
155 }
156
157 fn read_literal_string(&mut self, offset: u64) -> Result<Token, PdfError> {
159 debug_assert_eq!(self.source[self.pos], b'(');
160 self.pos += 1; let mut bytes = Vec::new();
163 let mut depth: u32 = 1;
164
165 while self.pos < self.source.len() {
166 let b = self.source[self.pos];
167 self.pos += 1;
168
169 match b {
170 b'(' => {
171 depth += 1;
172 bytes.push(b'(');
173 }
174 b')' => {
175 depth -= 1;
176 if depth == 0 {
177 return Ok(Token::String(PdfString::from_bytes(bytes)));
178 }
179 bytes.push(b')');
180 }
181 b'\\' => {
182 if self.pos >= self.source.len() {
183 return Err(PdfError::Parse(ParseError::InvalidString {
184 offset,
185 detail: "unexpected end of input in escape sequence".into(),
186 }));
187 }
188 let escaped = self.source[self.pos];
189 self.pos += 1;
190 match escaped {
191 b'n' => bytes.push(b'\n'),
192 b'r' => bytes.push(b'\r'),
193 b't' => bytes.push(b'\t'),
194 b'b' => bytes.push(0x08),
195 b'f' => bytes.push(0x0C),
196 b'(' => bytes.push(b'('),
197 b')' => bytes.push(b')'),
198 b'\\' => bytes.push(b'\\'),
199 b'\r' => {
200 if self.pos < self.source.len() && self.source[self.pos] == b'\n' {
202 self.pos += 1;
203 }
204 }
205 b'\n' => {
206 }
208 b'0'..=b'7' => {
209 let mut val: u8 = escaped - b'0';
211 for _ in 0..2 {
212 if self.pos < self.source.len() {
213 let next = self.source[self.pos];
214 if (b'0'..=b'7').contains(&next) {
215 val = val * 8 + (next - b'0');
216 self.pos += 1;
217 } else {
218 break;
219 }
220 }
221 }
222 bytes.push(val);
223 }
224 _ => {
225 bytes.push(escaped);
228 }
229 }
230 }
231 _ => bytes.push(b),
232 }
233 }
234
235 Err(PdfError::Parse(ParseError::InvalidString {
236 offset,
237 detail: "unterminated literal string".into(),
238 }))
239 }
240
241 fn read_hex_string(&mut self, offset: u64) -> Result<Token, PdfError> {
243 debug_assert_eq!(self.source[self.pos], b'<');
244 self.pos += 1; let mut hex_chars = Vec::new();
247
248 while self.pos < self.source.len() {
249 let b = self.source[self.pos];
250 self.pos += 1;
251
252 match b {
253 b'>' => {
254 if hex_chars.len() % 2 != 0 {
256 hex_chars.push(0);
257 }
258 let bytes: Vec<u8> = hex_chars
259 .chunks(2)
260 .map(|pair| pair[0] * 16 + pair[1])
261 .collect();
262 return Ok(Token::String(PdfString::from_bytes(bytes)));
263 }
264 _ if is_whitespace(b) => continue,
265 _ => {
266 let nibble =
267 hex_digit(b).ok_or(PdfError::Parse(ParseError::InvalidString {
268 offset,
269 detail: format!("invalid hex digit 0x{:02x}", b),
270 }))?;
271 hex_chars.push(nibble);
272 }
273 }
274 }
275
276 Err(PdfError::Parse(ParseError::InvalidString {
277 offset,
278 detail: "unterminated hex string".into(),
279 }))
280 }
281
282 fn read_name(&mut self, _offset: u64) -> Result<Token, PdfError> {
284 debug_assert_eq!(self.source[self.pos], b'/');
285 self.pos += 1; let mut bytes = Vec::new();
288
289 while self.pos < self.source.len() {
290 let b = self.source[self.pos];
291
292 if is_whitespace(b) || is_delimiter(b) {
293 break;
294 }
295
296 self.pos += 1;
297
298 if b == b'#' {
299 if self.pos + 1 < self.source.len() {
301 let hi = hex_digit(self.source[self.pos]);
302 let lo = hex_digit(self.source[self.pos + 1]);
303 if let (Some(h), Some(l)) = (hi, lo) {
304 bytes.push(h * 16 + l);
305 self.pos += 2;
306 continue;
307 }
308 }
309 bytes.push(b'#');
311 } else {
312 bytes.push(b);
313 }
314 }
315
316 Ok(Token::Name(Name::from_bytes(bytes)))
317 }
318
319 fn read_number(&mut self, offset: u64) -> Result<Token, PdfError> {
321 let start = self.pos;
322 let mut has_dot = false;
323
324 if self.pos < self.source.len()
326 && (self.source[self.pos] == b'+' || self.source[self.pos] == b'-')
327 {
328 self.pos += 1;
329 }
330
331 if self.pos < self.source.len() && self.source[self.pos] == b'.' {
333 has_dot = true;
334 self.pos += 1;
335 }
336
337 let digit_start = self.pos;
338
339 while self.pos < self.source.len() {
340 let b = self.source[self.pos];
341 if b == b'.' && !has_dot {
342 has_dot = true;
343 self.pos += 1;
344 } else if b.is_ascii_digit() {
345 self.pos += 1;
346 } else {
347 break;
348 }
349 }
350
351 if self.pos == digit_start && !has_dot {
353 self.pos = start;
356 return self.read_keyword_or_bool(offset);
357 }
358
359 let num_str = std::str::from_utf8(&self.source[start..self.pos])
360 .map_err(|_| PdfError::Parse(ParseError::InvalidNumber { offset }))?;
361
362 if has_dot {
363 let val: f64 = num_str
364 .parse()
365 .map_err(|_| PdfError::Parse(ParseError::InvalidNumber { offset }))?;
366 Ok(Token::Real(val))
367 } else {
368 match num_str.parse::<i64>() {
370 Ok(val) => {
371 let saved_pos = self.pos;
373 if let Some(obj_ref) = self.try_read_reference(val, offset) {
374 return Ok(Token::Ref(obj_ref));
375 }
376 self.pos = saved_pos;
377 Ok(Token::Integer(val))
378 }
379 Err(_) => {
380 let val: f64 = num_str
381 .parse()
382 .map_err(|_| PdfError::Parse(ParseError::InvalidNumber { offset }))?;
383 Ok(Token::Real(val))
384 }
385 }
386 }
387 }
388
389 fn try_read_reference(&mut self, number: i64, _offset: u64) -> Option<ObjectId> {
391 if number < 0 {
392 return None;
393 }
394
395 let saved = self.pos;
396
397 self.skip_whitespace_and_comments();
398
399 let gen_start = self.pos;
401 while self.pos < self.source.len()
402 && self.source[self.pos] >= b'0'
403 && self.source[self.pos] <= b'9'
404 {
405 self.pos += 1;
406 }
407
408 if self.pos == gen_start {
409 self.pos = saved;
410 return None;
411 }
412
413 let gen_str = std::str::from_utf8(&self.source[gen_start..self.pos]).ok()?;
414 let generation: u16 = gen_str.parse().ok()?;
415
416 self.skip_whitespace_and_comments();
417
418 if self.pos < self.source.len() && self.source[self.pos] == b'R' {
420 let after_r = self.pos + 1;
421 if after_r >= self.source.len()
422 || is_whitespace(self.source[after_r])
423 || is_delimiter(self.source[after_r])
424 {
425 self.pos = after_r;
426 return Some(ObjectId::new(number as u32, generation));
427 }
428 }
429
430 self.pos = saved;
431 None
432 }
433
434 fn read_keyword_or_bool(&mut self, _offset: u64) -> Result<Token, PdfError> {
436 let start = self.pos;
437
438 while self.pos < self.source.len() {
439 let b = self.source[self.pos];
440 if is_whitespace(b) || is_delimiter(b) {
441 break;
442 }
443 self.pos += 1;
444 }
445
446 let word = &self.source[start..self.pos];
447
448 match word {
449 b"true" => Ok(Token::Boolean(true)),
450 b"false" => Ok(Token::Boolean(false)),
451 b"null" => Ok(Token::Null),
452 _ => Ok(Token::Keyword(word.to_vec())),
453 }
454 }
455}
456
457#[inline]
459pub fn is_whitespace(b: u8) -> bool {
460 matches!(b, b'\0' | b'\t' | b'\n' | 0x0C | b'\r' | b' ')
461}
462
463#[inline]
465pub fn is_delimiter(b: u8) -> bool {
466 matches!(
467 b,
468 b'(' | b')' | b'<' | b'>' | b'[' | b']' | b'{' | b'}' | b'/' | b'%'
469 )
470}
471
472#[inline]
474fn hex_digit(b: u8) -> Option<u8> {
475 match b {
476 b'0'..=b'9' => Some(b - b'0'),
477 b'a'..=b'f' => Some(b - b'a' + 10),
478 b'A'..=b'F' => Some(b - b'A' + 10),
479 _ => None,
480 }
481}
482
483#[cfg(test)]
484mod tests {
485 use super::*;
486
487 fn tokenize_all(source: &[u8]) -> Vec<Token> {
488 let mut tok = Tokenizer::new(source);
489 let mut tokens = Vec::new();
490 while let Some(result) = tok.next_token() {
491 tokens.push(result.unwrap());
492 }
493 tokens
494 }
495
496 #[test]
497 fn integer_token() {
498 let tokens = tokenize_all(b"42");
499 assert_eq!(tokens, vec![Token::Integer(42)]);
500 }
501
502 #[test]
503 fn negative_integer() {
504 let tokens = tokenize_all(b"-7");
505 assert_eq!(tokens, vec![Token::Integer(-7)]);
506 }
507
508 #[test]
509 fn positive_integer() {
510 let tokens = tokenize_all(b"+3");
511 assert_eq!(tokens, vec![Token::Integer(3)]);
512 }
513
514 #[test]
515 #[allow(clippy::approx_constant)]
516 fn real_number() {
517 let tokens = tokenize_all(b"3.14");
518 assert_eq!(tokens, vec![Token::Real(3.14)]);
519 }
520
521 #[test]
522 fn real_leading_dot() {
523 let tokens = tokenize_all(b".5");
524 assert_eq!(tokens, vec![Token::Real(0.5)]);
525 }
526
527 #[test]
528 fn real_negative() {
529 let tokens = tokenize_all(b"-1.5");
530 assert_eq!(tokens, vec![Token::Real(-1.5)]);
531 }
532
533 #[test]
534 fn boolean_true() {
535 let tokens = tokenize_all(b"true");
536 assert_eq!(tokens, vec![Token::Boolean(true)]);
537 }
538
539 #[test]
540 fn boolean_false() {
541 let tokens = tokenize_all(b"false");
542 assert_eq!(tokens, vec![Token::Boolean(false)]);
543 }
544
545 #[test]
546 fn null_token() {
547 let tokens = tokenize_all(b"null");
548 assert_eq!(tokens, vec![Token::Null]);
549 }
550
551 #[test]
552 fn name_simple() {
553 let tokens = tokenize_all(b"/Type");
554 assert_eq!(
555 tokens,
556 vec![Token::Name(Name::from_bytes(b"Type".to_vec()))]
557 );
558 }
559
560 #[test]
561 fn name_with_hex_escape() {
562 let tokens = tokenize_all(b"/Name#20With#20Spaces");
563 assert_eq!(
564 tokens,
565 vec![Token::Name(Name::from_bytes(b"Name With Spaces".to_vec()))]
566 );
567 }
568
569 #[test]
570 fn name_empty() {
571 let tokens = tokenize_all(b"/ ");
572 assert_eq!(tokens, vec![Token::Name(Name::from_bytes(Vec::new()))]);
573 }
574
575 #[test]
576 fn literal_string_simple() {
577 let tokens = tokenize_all(b"(Hello World)");
578 assert_eq!(
579 tokens,
580 vec![Token::String(PdfString::from_bytes(
581 b"Hello World".to_vec()
582 ))]
583 );
584 }
585
586 #[test]
587 fn literal_string_nested_parens() {
588 let tokens = tokenize_all(b"(A (B) C)");
589 assert_eq!(
590 tokens,
591 vec![Token::String(PdfString::from_bytes(b"A (B) C".to_vec()))]
592 );
593 }
594
595 #[test]
596 fn literal_string_escape_sequences() {
597 let tokens = tokenize_all(b"(\\n\\r\\t\\\\)");
598 assert_eq!(
599 tokens,
600 vec![Token::String(PdfString::from_bytes(b"\n\r\t\\".to_vec()))]
601 );
602 }
603
604 #[test]
605 fn literal_string_octal_escape() {
606 let tokens = tokenize_all(b"(\\101)"); assert_eq!(
608 tokens,
609 vec![Token::String(PdfString::from_bytes(b"A".to_vec()))]
610 );
611 }
612
613 #[test]
614 fn hex_string() {
615 let tokens = tokenize_all(b"<48656C6C6F>");
616 assert_eq!(
617 tokens,
618 vec![Token::String(PdfString::from_bytes(b"Hello".to_vec()))]
619 );
620 }
621
622 #[test]
623 fn hex_string_with_whitespace() {
624 let tokens = tokenize_all(b"<48 65 6C 6C 6F>");
625 assert_eq!(
626 tokens,
627 vec![Token::String(PdfString::from_bytes(b"Hello".to_vec()))]
628 );
629 }
630
631 #[test]
632 fn hex_string_odd_digits() {
633 let tokens = tokenize_all(b"<ABC>");
635 assert_eq!(
636 tokens,
637 vec![Token::String(PdfString::from_bytes(vec![0xAB, 0xC0]))]
638 );
639 }
640
641 #[test]
642 fn array_delimiters() {
643 let tokens = tokenize_all(b"[1 2]");
644 assert_eq!(
645 tokens,
646 vec![
647 Token::ArrayStart,
648 Token::Integer(1),
649 Token::Integer(2),
650 Token::ArrayEnd,
651 ]
652 );
653 }
654
655 #[test]
656 fn dict_delimiters() {
657 let tokens = tokenize_all(b"<< /Type /Catalog >>");
658 assert_eq!(
659 tokens,
660 vec![
661 Token::DictStart,
662 Token::Name(Name::from_bytes(b"Type".to_vec())),
663 Token::Name(Name::from_bytes(b"Catalog".to_vec())),
664 Token::DictEnd,
665 ]
666 );
667 }
668
669 #[test]
670 fn indirect_reference() {
671 let tokens = tokenize_all(b"5 0 R");
672 assert_eq!(tokens, vec![Token::Ref(ObjectId::new(5, 0))]);
673 }
674
675 #[test]
676 fn indirect_reference_in_array() {
677 let tokens = tokenize_all(b"[5 0 R]");
678 assert_eq!(
679 tokens,
680 vec![
681 Token::ArrayStart,
682 Token::Ref(ObjectId::new(5, 0)),
683 Token::ArrayEnd,
684 ]
685 );
686 }
687
688 #[test]
689 fn keyword_obj() {
690 let tokens = tokenize_all(b"obj");
691 assert_eq!(tokens, vec![Token::Keyword(b"obj".to_vec())]);
692 }
693
694 #[test]
695 fn keyword_stream() {
696 let tokens = tokenize_all(b"stream");
697 assert_eq!(tokens, vec![Token::Keyword(b"stream".to_vec())]);
698 }
699
700 #[test]
701 fn skip_comments() {
702 let tokens = tokenize_all(b"% this is a comment\n42");
703 assert_eq!(tokens, vec![Token::Integer(42)]);
704 }
705
706 #[test]
707 fn mixed_tokens() {
708 let source = b"1 0 obj\n<< /Type /Page /MediaBox [0 0 612 792] >>\nendobj";
709 let tokens = tokenize_all(source);
710 assert_eq!(tokens[0], Token::Integer(1));
711 assert_eq!(tokens[1], Token::Integer(0));
712 assert_eq!(tokens[2], Token::Keyword(b"obj".to_vec()));
713 assert_eq!(tokens[3], Token::DictStart);
714 }
715
716 #[test]
717 fn position_tracking() {
718 let mut tok = Tokenizer::new(b" 42");
719 assert_eq!(tok.position(), 0);
720 let _ = tok.next_token();
721 assert_eq!(tok.position(), 4);
722 }
723
724 #[test]
725 fn empty_input() {
726 let tokens = tokenize_all(b"");
727 assert!(tokens.is_empty());
728 }
729
730 #[test]
731 fn whitespace_only() {
732 let tokens = tokenize_all(b" \n\r\t ");
733 assert!(tokens.is_empty());
734 }
735
736 #[test]
737 fn literal_string_line_continuation() {
738 let tokens = tokenize_all(b"(line1\\\nline2)");
739 assert_eq!(
740 tokens,
741 vec![Token::String(PdfString::from_bytes(b"line1line2".to_vec()))]
742 );
743 }
744
745 #[test]
751 fn hex_string_empty() {
752 let tokens = tokenize_all(b"<>");
753 assert_eq!(
754 tokens,
755 vec![Token::String(PdfString::from_bytes(Vec::new()))]
756 );
757 }
758
759 #[test]
761 fn hex_string_stops_at_closing_angle() {
762 let tokens = tokenize_all(b"<1A2b>abcd");
763 assert_eq!(
764 tokens,
765 vec![
766 Token::String(PdfString::from_bytes(vec![0x1A, 0x2B])),
767 Token::Keyword(b"abcd".to_vec()),
768 ]
769 );
770 }
771
772 #[test]
774 fn hex_string_uneven_single_digit() {
775 let tokens = tokenize_all(b"<A>");
777 assert_eq!(
778 tokens,
779 vec![Token::String(PdfString::from_bytes(vec![0xA0]))]
780 );
781 }
782
783 #[test]
785 fn hex_string_unterminated_is_error() {
786 let mut tok = Tokenizer::new(b"<1A2b");
787 let result = tok.next_token();
788 assert!(result.is_some());
789 assert!(result.unwrap().is_err());
790 }
791
792 #[test]
794 fn hex_string_invalid_hex_digit() {
795 let mut tok = Tokenizer::new(b"<zz>");
796 let result = tok.next_token();
797 assert!(result.is_some());
798 assert!(result.unwrap().is_err());
799 }
800
801 #[test]
803 fn hex_string_just_closing_angle() {
804 let tokens = tokenize_all(b"<>");
806 assert_eq!(
807 tokens,
808 vec![Token::String(PdfString::from_bytes(Vec::new()))]
809 );
810 }
811
812 #[test]
814 fn hex_string_whitespace_varieties() {
815 let tokens = tokenize_all(b"<4 8\t6\n5\r6C>");
816 assert_eq!(
818 tokens,
819 vec![Token::String(PdfString::from_bytes(b"Hel".to_vec()))]
820 );
821 }
822
823 #[test]
825 fn hex_string_lowercase() {
826 let tokens = tokenize_all(b"<48656c6c6f>");
827 assert_eq!(
828 tokens,
829 vec![Token::String(PdfString::from_bytes(b"Hello".to_vec()))]
830 );
831 }
832
833 #[test]
840 fn invalid_reference_max_u32() {
841 let tokens = tokenize_all(b"4294967295 0 R");
842 assert!(!tokens.is_empty());
845 }
846
847 #[test]
853 fn multiple_words_whitespace_separated() {
854 let tokens = tokenize_all(b" foo bar ");
855 assert_eq!(
856 tokens,
857 vec![
858 Token::Keyword(b"foo".to_vec()),
859 Token::Keyword(b"bar".to_vec()),
860 ]
861 );
862 }
863
864 #[test]
866 fn words_around_delimiters() {
867 let tokens = tokenize_all(b"/Name[42]");
868 assert_eq!(
869 tokens,
870 vec![
871 Token::Name(Name::from_bytes(b"Name".to_vec())),
872 Token::ArrayStart,
873 Token::Integer(42),
874 Token::ArrayEnd,
875 ]
876 );
877 }
878
879 #[test]
881 fn comment_then_token() {
882 let tokens = tokenize_all(b"% comment line\r\n/Next");
883 assert_eq!(
884 tokens,
885 vec![Token::Name(Name::from_bytes(b"Next".to_vec()))]
886 );
887 }
888
889 #[test]
891 fn multiple_comments() {
892 let tokens = tokenize_all(b"% first\n% second\n42");
893 assert_eq!(tokens, vec![Token::Integer(42)]);
894 }
895
896 #[test]
898 fn literal_string_deeply_nested_parens() {
899 let tokens = tokenize_all(b"(a(b(c)d)e)");
900 assert_eq!(
901 tokens,
902 vec![Token::String(PdfString::from_bytes(b"a(b(c)d)e".to_vec()))]
903 );
904 }
905
906 #[test]
908 fn literal_string_escape_backspace_formfeed() {
909 let tokens = tokenize_all(b"(\\b\\f)");
910 assert_eq!(
911 tokens,
912 vec![Token::String(PdfString::from_bytes(vec![0x08, 0x0C]))]
913 );
914 }
915
916 #[test]
918 fn literal_string_escaped_parens() {
919 let tokens = tokenize_all(b"(\\(\\))");
920 assert_eq!(
921 tokens,
922 vec![Token::String(PdfString::from_bytes(b"()".to_vec()))]
923 );
924 }
925
926 #[test]
928 fn literal_string_unterminated() {
929 let mut tok = Tokenizer::new(b"(no closing paren");
930 let result = tok.next_token();
931 assert!(result.is_some());
932 assert!(result.unwrap().is_err());
933 }
934
935 #[test]
937 fn literal_string_octal_range() {
938 let tokens = tokenize_all(b"(\\000\\377)");
940 assert_eq!(
941 tokens,
942 vec![Token::String(PdfString::from_bytes(vec![0x00, 0xFF]))]
943 );
944 }
945
946 #[test]
948 fn literal_string_cr_lf_continuation() {
949 let tokens = tokenize_all(b"(line1\\\r\nline2)");
950 assert_eq!(
951 tokens,
952 vec![Token::String(PdfString::from_bytes(b"line1line2".to_vec()))]
953 );
954 }
955
956 #[test]
958 fn literal_string_unknown_escape() {
959 let tokens = tokenize_all(b"(\\z)");
961 assert_eq!(
962 tokens,
963 vec![Token::String(PdfString::from_bytes(b"z".to_vec()))]
964 );
965 }
966
967 #[test]
973 fn name_all_hex_encoded() {
974 let tokens = tokenize_all(b"/#54#79#70#65");
976 assert_eq!(
977 tokens,
978 vec![Token::Name(Name::from_bytes(b"Type".to_vec()))]
979 );
980 }
981
982 #[test]
984 fn name_at_eof() {
985 let tokens = tokenize_all(b"/EOF");
986 assert_eq!(tokens, vec![Token::Name(Name::from_bytes(b"EOF".to_vec()))]);
987 }
988
989 #[test]
995 fn large_integer_becomes_real() {
996 let tokens = tokenize_all(b"99999999999999999999");
997 assert_eq!(tokens.len(), 1);
998 match &tokens[0] {
1000 Token::Real(_) => {}
1001 other => panic!("expected Real, got {:?}", other),
1002 }
1003 }
1004
1005 #[test]
1007 fn zero_integer() {
1008 let tokens = tokenize_all(b"0");
1009 assert_eq!(tokens, vec![Token::Integer(0)]);
1010 }
1011
1012 #[test]
1014 fn negative_zero() {
1015 let tokens = tokenize_all(b"-0");
1016 assert_eq!(tokens, vec![Token::Integer(0)]);
1017 }
1018
1019 #[test]
1021 fn position_tracks_through_tokens() {
1022 let mut tok = Tokenizer::new(b"true false null");
1023 assert_eq!(tok.position(), 0);
1024 let _ = tok.next_token(); assert_eq!(tok.position(), 4);
1026 let _ = tok.next_token(); assert_eq!(tok.position(), 10);
1028 let _ = tok.next_token(); assert_eq!(tok.position(), 15);
1030 assert!(tok.next_token().is_none()); }
1032
1033 #[test]
1035 fn set_position_reparse() {
1036 let source = b"42 true";
1037 let mut tok = Tokenizer::new(source);
1038 let _ = tok.next_token(); tok.set_position(3); let t = tok.next_token().unwrap().unwrap();
1041 assert_eq!(t, Token::Boolean(true));
1042 }
1043
1044 #[test]
1046 fn lone_greater_than_is_error() {
1047 let mut tok = Tokenizer::new(b">");
1048 let result = tok.next_token();
1049 assert!(result.is_some());
1050 assert!(result.unwrap().is_err());
1051 }
1052
1053 #[test]
1055 fn nul_byte_is_whitespace() {
1056 let tokens = tokenize_all(&[0x00, b'4', b'2']);
1057 assert_eq!(tokens, vec![Token::Integer(42)]);
1058 }
1059}