1use super::{ParseError, ParseResult};
57use std::collections::HashMap;
58
59#[derive(Debug, Clone, PartialEq)]
93pub enum ContentOperation {
94 BeginText,
98
99 EndText,
102
103 SetCharSpacing(f32),
107
108 SetWordSpacing(f32),
111
112 SetHorizontalScaling(f32),
115
116 SetLeading(f32),
119
120 SetFont(String, f32),
123
124 SetTextRenderMode(i32),
127
128 SetTextRise(f32),
131
132 MoveText(f32, f32),
136
137 MoveTextSetLeading(f32, f32),
140
141 SetTextMatrix(f32, f32, f32, f32, f32, f32),
144
145 NextLine,
148
149 ShowText(Vec<u8>),
153
154 ShowTextArray(Vec<TextElement>),
157
158 NextLineShowText(Vec<u8>),
161
162 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
165
166 SaveGraphicsState,
170
171 RestoreGraphicsState,
174
175 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
178
179 SetLineWidth(f32),
181
182 SetLineCap(i32),
185
186 SetLineJoin(i32),
189
190 SetMiterLimit(f32),
193
194 SetDashPattern(Vec<f32>, f32),
197
198 SetIntent(String),
201
202 SetFlatness(f32),
205
206 SetGraphicsStateParams(String),
209
210 MoveTo(f32, f32),
213
214 LineTo(f32, f32),
216
217 CurveTo(f32, f32, f32, f32, f32, f32),
220
221 CurveToV(f32, f32, f32, f32),
223
224 CurveToY(f32, f32, f32, f32),
226
227 ClosePath,
230
231 Rectangle(f32, f32, f32, f32),
234
235 Stroke,
238
239 CloseStroke,
242
243 Fill,
245
246 FillEvenOdd,
248
249 FillStroke,
252
253 FillStrokeEvenOdd,
255
256 CloseFillStroke,
259
260 CloseFillStrokeEvenOdd,
262
263 EndPath,
266
267 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
275
276 SetNonStrokingColorSpace(String),
279
280 SetStrokingColor(Vec<f32>),
283
284 SetNonStrokingColor(Vec<f32>),
287
288 SetStrokingGray(f32),
291
292 SetNonStrokingGray(f32),
294
295 SetStrokingRGB(f32, f32, f32),
298
299 SetNonStrokingRGB(f32, f32, f32),
301
302 SetStrokingCMYK(f32, f32, f32, f32),
304
305 SetNonStrokingCMYK(f32, f32, f32, f32),
307
308 ShadingFill(String), BeginInlineImage, InlineImageData(Vec<u8>), PaintXObject(String),
319
320 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
331
332#[derive(Debug, Clone, PartialEq)]
351pub enum TextElement {
352 Text(Vec<u8>),
354 Spacing(f32),
357}
358
359#[derive(Debug, Clone, PartialEq)]
361pub(super) enum Token {
362 Number(f32),
363 Integer(i32),
364 String(Vec<u8>),
365 HexString(Vec<u8>),
366 Name(String),
367 Operator(String),
368 ArrayStart,
369 ArrayEnd,
370 DictStart,
371 DictEnd,
372}
373
374pub struct ContentTokenizer<'a> {
376 input: &'a [u8],
377 position: usize,
378}
379
380impl<'a> ContentTokenizer<'a> {
381 pub fn new(input: &'a [u8]) -> Self {
383 Self { input, position: 0 }
384 }
385
386 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
388 self.skip_whitespace();
389
390 if self.position >= self.input.len() {
391 return Ok(None);
392 }
393
394 let ch = self.input[self.position];
395
396 match ch {
397 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
399
400 b'(' => self.read_literal_string(),
402 b'<' => {
403 if self.peek_next() == Some(b'<') {
404 self.position += 2;
405 Ok(Some(Token::DictStart))
406 } else {
407 self.read_hex_string()
408 }
409 }
410 b'>' => {
411 if self.peek_next() == Some(b'>') {
412 self.position += 2;
413 Ok(Some(Token::DictEnd))
414 } else {
415 Err(ParseError::SyntaxError {
416 position: self.position,
417 message: "Unexpected '>'".to_string(),
418 })
419 }
420 }
421
422 b'[' => {
424 self.position += 1;
425 Ok(Some(Token::ArrayStart))
426 }
427 b']' => {
428 self.position += 1;
429 Ok(Some(Token::ArrayEnd))
430 }
431
432 b'/' => self.read_name(),
434
435 _ => self.read_operator(),
437 }
438 }
439
440 fn skip_whitespace(&mut self) {
441 while self.position < self.input.len() {
442 match self.input[self.position] {
443 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
444 b'%' => self.skip_comment(),
445 _ => break,
446 }
447 }
448 }
449
450 fn skip_comment(&mut self) {
451 while self.position < self.input.len() && self.input[self.position] != b'\n' {
452 self.position += 1;
453 }
454 }
455
456 fn peek_next(&self) -> Option<u8> {
457 if self.position + 1 < self.input.len() {
458 Some(self.input[self.position + 1])
459 } else {
460 None
461 }
462 }
463
464 fn read_number(&mut self) -> ParseResult<Option<Token>> {
465 let start = self.position;
466 let mut has_dot = false;
467
468 if self.position < self.input.len()
470 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
471 {
472 self.position += 1;
473 }
474
475 while self.position < self.input.len() {
477 match self.input[self.position] {
478 b'0'..=b'9' => self.position += 1,
479 b'.' if !has_dot => {
480 has_dot = true;
481 self.position += 1;
482 }
483 _ => break,
484 }
485 }
486
487 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
488 ParseError::SyntaxError {
489 position: start,
490 message: "Invalid number format".to_string(),
491 }
492 })?;
493
494 if has_dot {
495 let value = num_str
496 .parse::<f32>()
497 .map_err(|_| ParseError::SyntaxError {
498 position: start,
499 message: "Invalid float number".to_string(),
500 })?;
501 Ok(Some(Token::Number(value)))
502 } else {
503 let value = num_str
504 .parse::<i32>()
505 .map_err(|_| ParseError::SyntaxError {
506 position: start,
507 message: "Invalid integer number".to_string(),
508 })?;
509 Ok(Some(Token::Integer(value)))
510 }
511 }
512
513 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
514 self.position += 1; let mut result = Vec::new();
516 let mut paren_depth = 1;
517 let mut escape = false;
518
519 while self.position < self.input.len() && paren_depth > 0 {
520 let ch = self.input[self.position];
521 self.position += 1;
522
523 if escape {
524 match ch {
525 b'n' => result.push(b'\n'),
526 b'r' => result.push(b'\r'),
527 b't' => result.push(b'\t'),
528 b'b' => result.push(b'\x08'),
529 b'f' => result.push(b'\x0C'),
530 b'(' => result.push(b'('),
531 b')' => result.push(b')'),
532 b'\\' => result.push(b'\\'),
533 b'0'..=b'7' => {
534 self.position -= 1;
536 let octal_value = self.read_octal_escape()?;
537 result.push(octal_value);
538 }
539 _ => result.push(ch), }
541 escape = false;
542 } else {
543 match ch {
544 b'\\' => escape = true,
545 b'(' => {
546 paren_depth += 1;
547 result.push(ch);
548 }
549 b')' => {
550 paren_depth -= 1;
551 if paren_depth > 0 {
552 result.push(ch);
553 }
554 }
555 _ => result.push(ch),
556 }
557 }
558 }
559
560 Ok(Some(Token::String(result)))
561 }
562
563 fn read_octal_escape(&mut self) -> ParseResult<u8> {
564 let mut value = 0u8;
565 let mut count = 0;
566
567 while count < 3 && self.position < self.input.len() {
568 match self.input[self.position] {
569 b'0'..=b'7' => {
570 value = value * 8 + (self.input[self.position] - b'0');
571 self.position += 1;
572 count += 1;
573 }
574 _ => break,
575 }
576 }
577
578 Ok(value)
579 }
580
581 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
582 self.position += 1; let mut result = Vec::new();
584 let mut nibble = None;
585
586 while self.position < self.input.len() {
587 let ch = self.input[self.position];
588
589 match ch {
590 b'>' => {
591 self.position += 1;
592 if let Some(n) = nibble {
594 result.push(n << 4);
595 }
596 return Ok(Some(Token::HexString(result)));
597 }
598 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
599 let digit = if ch <= b'9' {
600 ch - b'0'
601 } else if ch <= b'F' {
602 ch - b'A' + 10
603 } else {
604 ch - b'a' + 10
605 };
606
607 if let Some(n) = nibble {
608 result.push((n << 4) | digit);
609 nibble = None;
610 } else {
611 nibble = Some(digit);
612 }
613 self.position += 1;
614 }
615 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
616 self.position += 1;
618 }
619 _ => {
620 return Err(ParseError::SyntaxError {
621 position: self.position,
622 message: format!("Invalid character in hex string: {:?}", ch as char),
623 });
624 }
625 }
626 }
627
628 Err(ParseError::SyntaxError {
629 position: self.position,
630 message: "Unterminated hex string".to_string(),
631 })
632 }
633
634 fn read_name(&mut self) -> ParseResult<Option<Token>> {
635 self.position += 1; let start = self.position;
637
638 while self.position < self.input.len() {
639 let ch = self.input[self.position];
640 match ch {
641 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
642 | b']' | b'{' | b'}' | b'/' | b'%' => break,
643 b'#' => {
644 self.position += 1;
646 if self.position + 1 < self.input.len() {
647 self.position += 2;
648 }
649 }
650 _ => self.position += 1,
651 }
652 }
653
654 let name_bytes = &self.input[start..self.position];
655 let name = self.decode_name(name_bytes)?;
656 Ok(Some(Token::Name(name)))
657 }
658
659 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
660 let mut result = Vec::new();
661 let mut i = 0;
662
663 while i < bytes.len() {
664 if bytes[i] == b'#' && i + 2 < bytes.len() {
665 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
667 ParseError::SyntaxError {
668 position: self.position,
669 message: "Invalid hex escape in name".to_string(),
670 }
671 })?;
672 let value =
673 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
674 position: self.position,
675 message: "Invalid hex escape in name".to_string(),
676 })?;
677 result.push(value);
678 i += 3;
679 } else {
680 result.push(bytes[i]);
681 i += 1;
682 }
683 }
684
685 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
686 position: self.position,
687 message: "Invalid UTF-8 in name".to_string(),
688 })
689 }
690
691 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
692 let start = self.position;
693
694 while self.position < self.input.len() {
695 let ch = self.input[self.position];
696 match ch {
697 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
698 | b']' | b'{' | b'}' | b'/' | b'%' => break,
699 _ => self.position += 1,
700 }
701 }
702
703 let op_bytes = &self.input[start..self.position];
704 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
705 position: start,
706 message: "Invalid operator".to_string(),
707 })?;
708
709 Ok(Some(Token::Operator(op.to_string())))
710 }
711}
712
713pub struct ContentParser {
732 tokens: Vec<Token>,
733 position: usize,
734}
735
736impl ContentParser {
737 pub fn new(_content: &[u8]) -> Self {
739 Self {
740 tokens: Vec::new(),
741 position: 0,
742 }
743 }
744
745 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
779 Self::parse_content(content)
780 }
781
782 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
787 let mut tokenizer = ContentTokenizer::new(content);
788 let mut tokens = Vec::new();
789
790 while let Some(token) = tokenizer.next_token()? {
792 tokens.push(token);
793 }
794
795 let mut parser = Self {
796 tokens,
797 position: 0,
798 };
799
800 parser.parse_operators()
801 }
802
803 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
804 let mut operators = Vec::new();
805 let mut operand_stack: Vec<Token> = Vec::new();
806
807 while self.position < self.tokens.len() {
808 let token = self.tokens[self.position].clone();
809 self.position += 1;
810
811 match &token {
812 Token::Operator(op) => {
813 let operator = self.parse_operator(op, &mut operand_stack)?;
814 operators.push(operator);
815 }
816 _ => {
817 operand_stack.push(token);
819 }
820 }
821 }
822
823 Ok(operators)
824 }
825
826 fn parse_operator(
827 &mut self,
828 op: &str,
829 operands: &mut Vec<Token>,
830 ) -> ParseResult<ContentOperation> {
831 let operator = match op {
832 "BT" => ContentOperation::BeginText,
834 "ET" => ContentOperation::EndText,
835
836 "Tc" => {
838 let spacing = self.pop_number(operands)?;
839 ContentOperation::SetCharSpacing(spacing)
840 }
841 "Tw" => {
842 let spacing = self.pop_number(operands)?;
843 ContentOperation::SetWordSpacing(spacing)
844 }
845 "Tz" => {
846 let scale = self.pop_number(operands)?;
847 ContentOperation::SetHorizontalScaling(scale)
848 }
849 "TL" => {
850 let leading = self.pop_number(operands)?;
851 ContentOperation::SetLeading(leading)
852 }
853 "Tf" => {
854 let size = self.pop_number(operands)?;
855 let font = self.pop_name(operands)?;
856 ContentOperation::SetFont(font, size)
857 }
858 "Tr" => {
859 let mode = self.pop_integer(operands)?;
860 ContentOperation::SetTextRenderMode(mode)
861 }
862 "Ts" => {
863 let rise = self.pop_number(operands)?;
864 ContentOperation::SetTextRise(rise)
865 }
866
867 "Td" => {
869 let ty = self.pop_number(operands)?;
870 let tx = self.pop_number(operands)?;
871 ContentOperation::MoveText(tx, ty)
872 }
873 "TD" => {
874 let ty = self.pop_number(operands)?;
875 let tx = self.pop_number(operands)?;
876 ContentOperation::MoveTextSetLeading(tx, ty)
877 }
878 "Tm" => {
879 let f = self.pop_number(operands)?;
880 let e = self.pop_number(operands)?;
881 let d = self.pop_number(operands)?;
882 let c = self.pop_number(operands)?;
883 let b = self.pop_number(operands)?;
884 let a = self.pop_number(operands)?;
885 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
886 }
887 "T*" => ContentOperation::NextLine,
888
889 "Tj" => {
891 let text = self.pop_string(operands)?;
892 ContentOperation::ShowText(text)
893 }
894 "TJ" => {
895 let array = self.pop_array(operands)?;
896 let elements = self.parse_text_array(array)?;
897 ContentOperation::ShowTextArray(elements)
898 }
899 "'" => {
900 let text = self.pop_string(operands)?;
901 ContentOperation::NextLineShowText(text)
902 }
903 "\"" => {
904 let text = self.pop_string(operands)?;
905 let aw = self.pop_number(operands)?;
906 let ac = self.pop_number(operands)?;
907 ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
908 }
909
910 "q" => ContentOperation::SaveGraphicsState,
912 "Q" => ContentOperation::RestoreGraphicsState,
913 "cm" => {
914 let f = self.pop_number(operands)?;
915 let e = self.pop_number(operands)?;
916 let d = self.pop_number(operands)?;
917 let c = self.pop_number(operands)?;
918 let b = self.pop_number(operands)?;
919 let a = self.pop_number(operands)?;
920 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
921 }
922 "w" => {
923 let width = self.pop_number(operands)?;
924 ContentOperation::SetLineWidth(width)
925 }
926 "J" => {
927 let cap = self.pop_integer(operands)?;
928 ContentOperation::SetLineCap(cap)
929 }
930 "j" => {
931 let join = self.pop_integer(operands)?;
932 ContentOperation::SetLineJoin(join)
933 }
934 "M" => {
935 let limit = self.pop_number(operands)?;
936 ContentOperation::SetMiterLimit(limit)
937 }
938 "d" => {
939 let phase = self.pop_number(operands)?;
940 let array = self.pop_array(operands)?;
941 let pattern = self.parse_dash_array(array)?;
942 ContentOperation::SetDashPattern(pattern, phase)
943 }
944 "ri" => {
945 let intent = self.pop_name(operands)?;
946 ContentOperation::SetIntent(intent)
947 }
948 "i" => {
949 let flatness = self.pop_number(operands)?;
950 ContentOperation::SetFlatness(flatness)
951 }
952 "gs" => {
953 let name = self.pop_name(operands)?;
954 ContentOperation::SetGraphicsStateParams(name)
955 }
956
957 "m" => {
959 let y = self.pop_number(operands)?;
960 let x = self.pop_number(operands)?;
961 ContentOperation::MoveTo(x, y)
962 }
963 "l" => {
964 let y = self.pop_number(operands)?;
965 let x = self.pop_number(operands)?;
966 ContentOperation::LineTo(x, y)
967 }
968 "c" => {
969 let y3 = self.pop_number(operands)?;
970 let x3 = self.pop_number(operands)?;
971 let y2 = self.pop_number(operands)?;
972 let x2 = self.pop_number(operands)?;
973 let y1 = self.pop_number(operands)?;
974 let x1 = self.pop_number(operands)?;
975 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
976 }
977 "v" => {
978 let y3 = self.pop_number(operands)?;
979 let x3 = self.pop_number(operands)?;
980 let y2 = self.pop_number(operands)?;
981 let x2 = self.pop_number(operands)?;
982 ContentOperation::CurveToV(x2, y2, x3, y3)
983 }
984 "y" => {
985 let y3 = self.pop_number(operands)?;
986 let x3 = self.pop_number(operands)?;
987 let y1 = self.pop_number(operands)?;
988 let x1 = self.pop_number(operands)?;
989 ContentOperation::CurveToY(x1, y1, x3, y3)
990 }
991 "h" => ContentOperation::ClosePath,
992 "re" => {
993 let height = self.pop_number(operands)?;
994 let width = self.pop_number(operands)?;
995 let y = self.pop_number(operands)?;
996 let x = self.pop_number(operands)?;
997 ContentOperation::Rectangle(x, y, width, height)
998 }
999
1000 "S" => ContentOperation::Stroke,
1002 "s" => ContentOperation::CloseStroke,
1003 "f" | "F" => ContentOperation::Fill,
1004 "f*" => ContentOperation::FillEvenOdd,
1005 "B" => ContentOperation::FillStroke,
1006 "B*" => ContentOperation::FillStrokeEvenOdd,
1007 "b" => ContentOperation::CloseFillStroke,
1008 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1009 "n" => ContentOperation::EndPath,
1010
1011 "W" => ContentOperation::Clip,
1013 "W*" => ContentOperation::ClipEvenOdd,
1014
1015 "CS" => {
1017 let name = self.pop_name(operands)?;
1018 ContentOperation::SetStrokingColorSpace(name)
1019 }
1020 "cs" => {
1021 let name = self.pop_name(operands)?;
1022 ContentOperation::SetNonStrokingColorSpace(name)
1023 }
1024 "SC" | "SCN" => {
1025 let components = self.pop_color_components(operands)?;
1026 ContentOperation::SetStrokingColor(components)
1027 }
1028 "sc" | "scn" => {
1029 let components = self.pop_color_components(operands)?;
1030 ContentOperation::SetNonStrokingColor(components)
1031 }
1032 "G" => {
1033 let gray = self.pop_number(operands)?;
1034 ContentOperation::SetStrokingGray(gray)
1035 }
1036 "g" => {
1037 let gray = self.pop_number(operands)?;
1038 ContentOperation::SetNonStrokingGray(gray)
1039 }
1040 "RG" => {
1041 let b = self.pop_number(operands)?;
1042 let g = self.pop_number(operands)?;
1043 let r = self.pop_number(operands)?;
1044 ContentOperation::SetStrokingRGB(r, g, b)
1045 }
1046 "rg" => {
1047 let b = self.pop_number(operands)?;
1048 let g = self.pop_number(operands)?;
1049 let r = self.pop_number(operands)?;
1050 ContentOperation::SetNonStrokingRGB(r, g, b)
1051 }
1052 "K" => {
1053 let k = self.pop_number(operands)?;
1054 let y = self.pop_number(operands)?;
1055 let m = self.pop_number(operands)?;
1056 let c = self.pop_number(operands)?;
1057 ContentOperation::SetStrokingCMYK(c, m, y, k)
1058 }
1059 "k" => {
1060 let k = self.pop_number(operands)?;
1061 let y = self.pop_number(operands)?;
1062 let m = self.pop_number(operands)?;
1063 let c = self.pop_number(operands)?;
1064 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1065 }
1066
1067 "sh" => {
1069 let name = self.pop_name(operands)?;
1070 ContentOperation::ShadingFill(name)
1071 }
1072
1073 "Do" => {
1075 let name = self.pop_name(operands)?;
1076 ContentOperation::PaintXObject(name)
1077 }
1078
1079 "BMC" => {
1081 let tag = self.pop_name(operands)?;
1082 ContentOperation::BeginMarkedContent(tag)
1083 }
1084 "BDC" => {
1085 let props = self.pop_dict_or_name(operands)?;
1086 let tag = self.pop_name(operands)?;
1087 ContentOperation::BeginMarkedContentWithProps(tag, props)
1088 }
1089 "EMC" => ContentOperation::EndMarkedContent,
1090 "MP" => {
1091 let tag = self.pop_name(operands)?;
1092 ContentOperation::DefineMarkedContentPoint(tag)
1093 }
1094 "DP" => {
1095 let props = self.pop_dict_or_name(operands)?;
1096 let tag = self.pop_name(operands)?;
1097 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1098 }
1099
1100 "BX" => ContentOperation::BeginCompatibility,
1102 "EX" => ContentOperation::EndCompatibility,
1103
1104 "BI" => {
1106 operands.clear(); self.parse_inline_image()?
1108 }
1109
1110 _ => {
1111 return Err(ParseError::SyntaxError {
1112 position: self.position,
1113 message: format!("Unknown operator: {op}"),
1114 });
1115 }
1116 };
1117
1118 operands.clear(); Ok(operator)
1120 }
1121
1122 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1124 match operands.pop() {
1125 Some(Token::Number(n)) => Ok(n),
1126 Some(Token::Integer(i)) => Ok(i as f32),
1127 _ => Err(ParseError::SyntaxError {
1128 position: self.position,
1129 message: "Expected number operand".to_string(),
1130 }),
1131 }
1132 }
1133
1134 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1135 match operands.pop() {
1136 Some(Token::Integer(i)) => Ok(i),
1137 _ => Err(ParseError::SyntaxError {
1138 position: self.position,
1139 message: "Expected integer operand".to_string(),
1140 }),
1141 }
1142 }
1143
1144 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1145 match operands.pop() {
1146 Some(Token::Name(n)) => Ok(n),
1147 _ => Err(ParseError::SyntaxError {
1148 position: self.position,
1149 message: "Expected name operand".to_string(),
1150 }),
1151 }
1152 }
1153
1154 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1155 match operands.pop() {
1156 Some(Token::String(s)) => Ok(s),
1157 Some(Token::HexString(s)) => Ok(s),
1158 _ => Err(ParseError::SyntaxError {
1159 position: self.position,
1160 message: "Expected string operand".to_string(),
1161 }),
1162 }
1163 }
1164
1165 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1166 let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1168 if has_array_end {
1169 operands.pop(); }
1171
1172 let mut array = Vec::new();
1173 let mut found_start = false;
1174
1175 while let Some(token) = operands.pop() {
1177 match token {
1178 Token::ArrayStart => {
1179 found_start = true;
1180 break;
1181 }
1182 Token::ArrayEnd => {
1183 continue;
1185 }
1186 _ => array.push(token),
1187 }
1188 }
1189
1190 if !found_start {
1191 return Err(ParseError::SyntaxError {
1192 position: self.position,
1193 message: "Expected array".to_string(),
1194 });
1195 }
1196
1197 array.reverse(); Ok(array)
1199 }
1200
1201 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1202 if let Some(token) = operands.pop() {
1203 match token {
1204 Token::Name(name) => {
1205 let mut props = HashMap::new();
1208 props.insert("__resource_ref".to_string(), name);
1209 Ok(props)
1210 }
1211 Token::DictStart => {
1212 let mut props = HashMap::new();
1214
1215 while let Some(value_token) = operands.pop() {
1217 if matches!(value_token, Token::DictEnd) {
1218 break;
1219 }
1220
1221 if let Token::Name(key) = value_token {
1223 if let Some(value_token) = operands.pop() {
1224 let value = match value_token {
1225 Token::Name(name) => name,
1226 Token::String(s) => String::from_utf8_lossy(&s).to_string(),
1227 Token::Integer(i) => i.to_string(),
1228 Token::Number(f) => f.to_string(),
1229 _ => continue, };
1231 props.insert(key, value);
1232 }
1233 }
1234 }
1235
1236 Ok(props)
1237 }
1238 _ => {
1239 Ok(HashMap::new())
1241 }
1242 }
1243 } else {
1244 Err(ParseError::SyntaxError {
1246 position: 0,
1247 message: "Expected dictionary or name for marked content properties".to_string(),
1248 })
1249 }
1250 }
1251
1252 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1253 let mut components = Vec::new();
1254
1255 while let Some(token) = operands.last() {
1257 match token {
1258 Token::Number(n) => {
1259 components.push(*n);
1260 operands.pop();
1261 }
1262 Token::Integer(i) => {
1263 components.push(*i as f32);
1264 operands.pop();
1265 }
1266 _ => break,
1267 }
1268 }
1269
1270 components.reverse();
1271 Ok(components)
1272 }
1273
1274 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1275 let mut elements = Vec::new();
1276
1277 for token in tokens {
1278 match token {
1279 Token::String(s) | Token::HexString(s) => {
1280 elements.push(TextElement::Text(s));
1281 }
1282 Token::Number(n) => {
1283 elements.push(TextElement::Spacing(n));
1284 }
1285 Token::Integer(i) => {
1286 elements.push(TextElement::Spacing(i as f32));
1287 }
1288 _ => {
1289 return Err(ParseError::SyntaxError {
1290 position: self.position,
1291 message: "Invalid element in text array".to_string(),
1292 });
1293 }
1294 }
1295 }
1296
1297 Ok(elements)
1298 }
1299
1300 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1301 let mut pattern = Vec::new();
1302
1303 for token in tokens {
1304 match token {
1305 Token::Number(n) => pattern.push(n),
1306 Token::Integer(i) => pattern.push(i as f32),
1307 _ => {
1308 return Err(ParseError::SyntaxError {
1309 position: self.position,
1310 message: "Invalid element in dash array".to_string(),
1311 });
1312 }
1313 }
1314 }
1315
1316 Ok(pattern)
1317 }
1318
1319 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1320 while self.position < self.tokens.len() {
1324 if let Token::Operator(op) = &self.tokens[self.position] {
1325 if op == "EI" {
1326 self.position += 1;
1327 break;
1328 }
1329 }
1330 self.position += 1;
1331 }
1332
1333 Ok(ContentOperation::BeginInlineImage)
1334 }
1335}
1336
1337#[cfg(test)]
1338mod tests {
1339 use super::*;
1340
1341 #[test]
1342 fn test_tokenize_numbers() {
1343 let input = b"123 -45 3.14159 -0.5 .5";
1344 let mut tokenizer = ContentTokenizer::new(input);
1345
1346 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1347 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1348 assert_eq!(
1349 tokenizer.next_token().unwrap(),
1350 Some(Token::Number(3.14159))
1351 );
1352 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1353 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1354 assert_eq!(tokenizer.next_token().unwrap(), None);
1355 }
1356
1357 #[test]
1358 fn test_tokenize_strings() {
1359 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1360 let mut tokenizer = ContentTokenizer::new(input);
1361
1362 assert_eq!(
1363 tokenizer.next_token().unwrap(),
1364 Some(Token::String(b"Hello World".to_vec()))
1365 );
1366 assert_eq!(
1367 tokenizer.next_token().unwrap(),
1368 Some(Token::String(b"Hello\nWorld".to_vec()))
1369 );
1370 assert_eq!(
1371 tokenizer.next_token().unwrap(),
1372 Some(Token::String(b"Nested (paren)".to_vec()))
1373 );
1374 }
1375
1376 #[test]
1377 fn test_tokenize_hex_strings() {
1378 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1379 let mut tokenizer = ContentTokenizer::new(input);
1380
1381 assert_eq!(
1382 tokenizer.next_token().unwrap(),
1383 Some(Token::HexString(b"Hello".to_vec()))
1384 );
1385 assert_eq!(
1386 tokenizer.next_token().unwrap(),
1387 Some(Token::HexString(b"Hello".to_vec()))
1388 );
1389 }
1390
1391 #[test]
1392 fn test_tokenize_names() {
1393 let input = b"/Name /Name#20with#20spaces /A#42C";
1394 let mut tokenizer = ContentTokenizer::new(input);
1395
1396 assert_eq!(
1397 tokenizer.next_token().unwrap(),
1398 Some(Token::Name("Name".to_string()))
1399 );
1400 assert_eq!(
1401 tokenizer.next_token().unwrap(),
1402 Some(Token::Name("Name with spaces".to_string()))
1403 );
1404 assert_eq!(
1405 tokenizer.next_token().unwrap(),
1406 Some(Token::Name("ABC".to_string()))
1407 );
1408 }
1409
1410 #[test]
1411 fn test_tokenize_operators() {
1412 let input = b"BT Tj ET q Q";
1413 let mut tokenizer = ContentTokenizer::new(input);
1414
1415 assert_eq!(
1416 tokenizer.next_token().unwrap(),
1417 Some(Token::Operator("BT".to_string()))
1418 );
1419 assert_eq!(
1420 tokenizer.next_token().unwrap(),
1421 Some(Token::Operator("Tj".to_string()))
1422 );
1423 assert_eq!(
1424 tokenizer.next_token().unwrap(),
1425 Some(Token::Operator("ET".to_string()))
1426 );
1427 assert_eq!(
1428 tokenizer.next_token().unwrap(),
1429 Some(Token::Operator("q".to_string()))
1430 );
1431 assert_eq!(
1432 tokenizer.next_token().unwrap(),
1433 Some(Token::Operator("Q".to_string()))
1434 );
1435 }
1436
1437 #[test]
1438 fn test_parse_text_operators() {
1439 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1440 let operators = ContentParser::parse(content).unwrap();
1441
1442 assert_eq!(operators.len(), 5);
1443 assert_eq!(operators[0], ContentOperation::BeginText);
1444 assert_eq!(
1445 operators[1],
1446 ContentOperation::SetFont("F1".to_string(), 12.0)
1447 );
1448 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1449 assert_eq!(
1450 operators[3],
1451 ContentOperation::ShowText(b"Hello World".to_vec())
1452 );
1453 assert_eq!(operators[4], ContentOperation::EndText);
1454 }
1455
1456 #[test]
1457 fn test_parse_graphics_operators() {
1458 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1459 let operators = ContentParser::parse(content).unwrap();
1460
1461 assert_eq!(operators.len(), 6);
1462 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1463 assert_eq!(
1464 operators[1],
1465 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1466 );
1467 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1468 assert_eq!(
1469 operators[3],
1470 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1471 );
1472 assert_eq!(operators[4], ContentOperation::Stroke);
1473 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1474 }
1475
1476 #[test]
1477 fn test_parse_color_operators() {
1478 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1479 let operators = ContentParser::parse(content).unwrap();
1480
1481 assert_eq!(operators.len(), 3);
1482 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1483 assert_eq!(
1484 operators[1],
1485 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1486 );
1487 assert_eq!(
1488 operators[2],
1489 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1490 );
1491 }
1492
1493 mod comprehensive_tests {
1495 use super::*;
1496
1497 #[test]
1498 fn test_all_text_operators() {
1499 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1501 let operators = ContentParser::parse(content).unwrap();
1502
1503 assert_eq!(operators[0], ContentOperation::BeginText);
1504 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1505 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1506 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1507 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1508 assert_eq!(
1509 operators[5],
1510 ContentOperation::SetFont("F1".to_string(), 12.0)
1511 );
1512 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1513 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1514 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1515 assert_eq!(
1516 operators[9],
1517 ContentOperation::MoveTextSetLeading(50.0, 150.0)
1518 );
1519 assert_eq!(operators[10], ContentOperation::NextLine);
1520 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1521 assert_eq!(operators[12], ContentOperation::EndText);
1522 }
1523
1524 #[test]
1525 fn test_all_graphics_state_operators() {
1526 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1528 let operators = ContentParser::parse(content).unwrap();
1529
1530 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1531 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1532 assert_eq!(
1533 operators[2],
1534 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1535 );
1536 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1537 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1538 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1539 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1540 assert_eq!(
1541 operators[7],
1542 ContentOperation::SetGraphicsStateParams("GS1".to_string())
1543 );
1544 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1545 assert_eq!(
1546 operators[9],
1547 ContentOperation::SetIntent("Perceptual".to_string())
1548 );
1549 }
1550
1551 #[test]
1552 fn test_all_path_construction_operators() {
1553 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1554 let operators = ContentParser::parse(content).unwrap();
1555
1556 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1557 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1558 assert_eq!(
1559 operators[2],
1560 ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1561 );
1562 assert_eq!(
1563 operators[3],
1564 ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1565 );
1566 assert_eq!(
1567 operators[4],
1568 ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1569 );
1570 assert_eq!(operators[5], ContentOperation::ClosePath);
1571 assert_eq!(
1572 operators[6],
1573 ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1574 );
1575 }
1576
1577 #[test]
1578 fn test_all_path_painting_operators() {
1579 let content = b"S s f F f* B B* b b* n W W*";
1580 let operators = ContentParser::parse(content).unwrap();
1581
1582 assert_eq!(operators[0], ContentOperation::Stroke);
1583 assert_eq!(operators[1], ContentOperation::CloseStroke);
1584 assert_eq!(operators[2], ContentOperation::Fill);
1585 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1587 assert_eq!(operators[5], ContentOperation::FillStroke);
1588 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1589 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1590 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1591 assert_eq!(operators[9], ContentOperation::EndPath);
1592 assert_eq!(operators[10], ContentOperation::Clip);
1593 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1594 }
1595
1596 #[test]
1597 fn test_all_color_operators() {
1598 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1600 let operators = ContentParser::parse(content).unwrap();
1601
1602 assert_eq!(
1603 operators[0],
1604 ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1605 );
1606 assert_eq!(
1607 operators[1],
1608 ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1609 );
1610 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1611 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1612 assert_eq!(
1613 operators[4],
1614 ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1615 );
1616 assert_eq!(
1617 operators[5],
1618 ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1619 );
1620 assert_eq!(
1621 operators[6],
1622 ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1623 );
1624 assert_eq!(
1625 operators[7],
1626 ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1627 );
1628 assert_eq!(
1629 operators[8],
1630 ContentOperation::ShadingFill("Shade1".to_string())
1631 );
1632 }
1633
1634 #[test]
1635 fn test_xobject_and_marked_content_operators() {
1636 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1638 let operators = ContentParser::parse(content).unwrap();
1639
1640 assert_eq!(
1641 operators[0],
1642 ContentOperation::PaintXObject("Image1".to_string())
1643 );
1644 assert_eq!(
1645 operators[1],
1646 ContentOperation::BeginMarkedContent("MC1".to_string())
1647 );
1648 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1649 assert_eq!(
1650 operators[3],
1651 ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1652 );
1653 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1654 assert_eq!(operators[5], ContentOperation::EndCompatibility);
1655 }
1656
1657 #[test]
1658 fn test_complex_content_stream() {
1659 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1660 let operators = ContentParser::parse(content).unwrap();
1661
1662 assert_eq!(operators.len(), 8);
1663 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1664 assert_eq!(
1665 operators[1],
1666 ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1667 );
1668 assert_eq!(operators[2], ContentOperation::BeginText);
1669 assert_eq!(
1670 operators[3],
1671 ContentOperation::SetFont("F1".to_string(), 12.0)
1672 );
1673 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1674 assert_eq!(
1675 operators[5],
1676 ContentOperation::ShowText(b"Complex".to_vec())
1677 );
1678 assert_eq!(operators[6], ContentOperation::EndText);
1679 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1680 }
1681
1682 #[test]
1683 fn test_tokenizer_whitespace_handling() {
1684 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
1685 let mut tokenizer = ContentTokenizer::new(input);
1686
1687 assert_eq!(
1688 tokenizer.next_token().unwrap(),
1689 Some(Token::Operator("BT".to_string()))
1690 );
1691 assert_eq!(
1692 tokenizer.next_token().unwrap(),
1693 Some(Token::Name("F1".to_string()))
1694 );
1695 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1696 assert_eq!(
1697 tokenizer.next_token().unwrap(),
1698 Some(Token::Operator("Tf".to_string()))
1699 );
1700 assert_eq!(
1701 tokenizer.next_token().unwrap(),
1702 Some(Token::Operator("ET".to_string()))
1703 );
1704 assert_eq!(tokenizer.next_token().unwrap(), None);
1705 }
1706
1707 #[test]
1708 fn test_tokenizer_edge_cases() {
1709 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1711 let mut tokenizer = ContentTokenizer::new(input);
1712
1713 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1714 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1715 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1716 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1717 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1718 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1719 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1720 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1721 }
1722
1723 #[test]
1724 fn test_string_parsing_edge_cases() {
1725 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1726 let mut tokenizer = ContentTokenizer::new(input);
1727
1728 assert_eq!(
1729 tokenizer.next_token().unwrap(),
1730 Some(Token::String(b"Simple".to_vec()))
1731 );
1732 assert_eq!(
1733 tokenizer.next_token().unwrap(),
1734 Some(Token::String(b"With\\backslash".to_vec()))
1735 );
1736 assert_eq!(
1737 tokenizer.next_token().unwrap(),
1738 Some(Token::String(b"With)paren".to_vec()))
1739 );
1740 assert_eq!(
1741 tokenizer.next_token().unwrap(),
1742 Some(Token::String(b"With\newline".to_vec()))
1743 );
1744 assert_eq!(
1745 tokenizer.next_token().unwrap(),
1746 Some(Token::String(b"With\ttab".to_vec()))
1747 );
1748 assert_eq!(
1749 tokenizer.next_token().unwrap(),
1750 Some(Token::String(b"With\rcarriage".to_vec()))
1751 );
1752 assert_eq!(
1753 tokenizer.next_token().unwrap(),
1754 Some(Token::String(b"With\x08backspace".to_vec()))
1755 );
1756 assert_eq!(
1757 tokenizer.next_token().unwrap(),
1758 Some(Token::String(b"With\x0Cformfeed".to_vec()))
1759 );
1760 assert_eq!(
1761 tokenizer.next_token().unwrap(),
1762 Some(Token::String(b"With(leftparen".to_vec()))
1763 );
1764 assert_eq!(
1765 tokenizer.next_token().unwrap(),
1766 Some(Token::String(b"With)rightparen".to_vec()))
1767 );
1768 }
1769
1770 #[test]
1771 fn test_hex_string_parsing() {
1772 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1773 let mut tokenizer = ContentTokenizer::new(input);
1774
1775 assert_eq!(
1776 tokenizer.next_token().unwrap(),
1777 Some(Token::HexString(b"Hello".to_vec()))
1778 );
1779 assert_eq!(
1780 tokenizer.next_token().unwrap(),
1781 Some(Token::HexString(b"Hello".to_vec()))
1782 );
1783 assert_eq!(
1784 tokenizer.next_token().unwrap(),
1785 Some(Token::HexString(b"HelloW".to_vec()))
1786 );
1787 assert_eq!(
1788 tokenizer.next_token().unwrap(),
1789 Some(Token::HexString(b"Hello\x50".to_vec()))
1790 );
1791 }
1792
1793 #[test]
1794 fn test_name_parsing_edge_cases() {
1795 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1796 let mut tokenizer = ContentTokenizer::new(input);
1797
1798 assert_eq!(
1799 tokenizer.next_token().unwrap(),
1800 Some(Token::Name("Name".to_string()))
1801 );
1802 assert_eq!(
1803 tokenizer.next_token().unwrap(),
1804 Some(Token::Name("Name with spaces".to_string()))
1805 );
1806 assert_eq!(
1807 tokenizer.next_token().unwrap(),
1808 Some(Token::Name("Name#with#hash".to_string()))
1809 );
1810 assert_eq!(
1811 tokenizer.next_token().unwrap(),
1812 Some(Token::Name("Name/with/slash".to_string()))
1813 );
1814 assert_eq!(
1815 tokenizer.next_token().unwrap(),
1816 Some(Token::Name("EmptyName".to_string()))
1817 );
1818 }
1819
1820 #[test]
1821 fn test_operator_parsing_edge_cases() {
1822 let content = b"q q q Q Q Q BT BT ET ET";
1823 let operators = ContentParser::parse(content).unwrap();
1824
1825 assert_eq!(operators.len(), 10);
1826 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1827 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1828 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1829 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1830 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1831 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1832 assert_eq!(operators[6], ContentOperation::BeginText);
1833 assert_eq!(operators[7], ContentOperation::BeginText);
1834 assert_eq!(operators[8], ContentOperation::EndText);
1835 assert_eq!(operators[9], ContentOperation::EndText);
1836 }
1837
1838 #[test]
1839 fn test_error_handling_insufficient_operands() {
1840 let content = b"100 Td"; let result = ContentParser::parse(content);
1842 assert!(result.is_err());
1843 }
1844
1845 #[test]
1846 fn test_error_handling_invalid_operator() {
1847 let content = b"100 200 INVALID";
1848 let result = ContentParser::parse(content);
1849 assert!(result.is_err());
1850 }
1851
1852 #[test]
1853 fn test_error_handling_malformed_string() {
1854 let input = b"(Unclosed string";
1856 let mut tokenizer = ContentTokenizer::new(input);
1857 let result = tokenizer.next_token();
1858 assert!(result.is_ok() || result.is_err());
1861 }
1862
1863 #[test]
1864 fn test_error_handling_malformed_hex_string() {
1865 let input = b"<48656C6C6G>";
1866 let mut tokenizer = ContentTokenizer::new(input);
1867 let result = tokenizer.next_token();
1868 assert!(result.is_err());
1869 }
1870
1871 #[test]
1872 fn test_error_handling_malformed_name() {
1873 let input = b"/Name#GG";
1874 let mut tokenizer = ContentTokenizer::new(input);
1875 let result = tokenizer.next_token();
1876 assert!(result.is_err());
1877 }
1878
1879 #[test]
1880 fn test_empty_content_stream() {
1881 let content = b"";
1882 let operators = ContentParser::parse(content).unwrap();
1883 assert_eq!(operators.len(), 0);
1884 }
1885
1886 #[test]
1887 fn test_whitespace_only_content_stream() {
1888 let content = b" \t\n\r ";
1889 let operators = ContentParser::parse(content).unwrap();
1890 assert_eq!(operators.len(), 0);
1891 }
1892
1893 #[test]
1894 fn test_mixed_integer_and_real_operands() {
1895 let content = b"100 200 m 150 200 l";
1897 let operators = ContentParser::parse(content).unwrap();
1898
1899 assert_eq!(operators.len(), 2);
1900 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1901 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1902 }
1903
1904 #[test]
1905 fn test_negative_operands() {
1906 let content = b"-100 -200 Td -50.5 -75.2 TD";
1907 let operators = ContentParser::parse(content).unwrap();
1908
1909 assert_eq!(operators.len(), 2);
1910 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
1911 assert_eq!(
1912 operators[1],
1913 ContentOperation::MoveTextSetLeading(-50.5, -75.2)
1914 );
1915 }
1916
1917 #[test]
1918 fn test_large_numbers() {
1919 let content = b"999999.999999 -999999.999999 m";
1920 let operators = ContentParser::parse(content).unwrap();
1921
1922 assert_eq!(operators.len(), 1);
1923 assert_eq!(
1924 operators[0],
1925 ContentOperation::MoveTo(999999.999999, -999999.999999)
1926 );
1927 }
1928
1929 #[test]
1930 fn test_scientific_notation() {
1931 let content = b"123.45 -456.78 m";
1933 let operators = ContentParser::parse(content).unwrap();
1934
1935 assert_eq!(operators.len(), 1);
1936 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
1937 }
1938
1939 #[test]
1940 fn test_show_text_array_complex() {
1941 let content = b"(Hello) TJ";
1943 let result = ContentParser::parse(content);
1944 assert!(result.is_err());
1946 }
1947
1948 #[test]
1949 fn test_dash_pattern_empty() {
1950 let content = b"0 d";
1952 let result = ContentParser::parse(content);
1953 assert!(result.is_err());
1955 }
1956
1957 #[test]
1958 fn test_dash_pattern_complex() {
1959 let content = b"2.5 d";
1961 let result = ContentParser::parse(content);
1962 assert!(result.is_err());
1964 }
1965
1966 #[test]
1967 fn test_pop_array_removes_array_end() {
1968 let parser = ContentParser::new(b"");
1970
1971 let mut operands = vec![
1973 Token::ArrayStart,
1974 Token::Integer(1),
1975 Token::Integer(2),
1976 Token::Integer(3),
1977 Token::ArrayEnd,
1978 ];
1979 let result = parser.pop_array(&mut operands).unwrap();
1980 assert_eq!(result.len(), 3);
1981 assert!(operands.is_empty());
1982
1983 let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
1985 let result = parser.pop_array(&mut operands).unwrap();
1986 assert_eq!(result.len(), 2);
1987 assert!(operands.is_empty());
1988 }
1989
1990 #[test]
1991 fn test_dash_array_parsing_valid() {
1992 let parser = ContentParser::new(b"");
1994
1995 let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
1997 let result = parser.parse_dash_array(valid_tokens).unwrap();
1998 assert_eq!(result, vec![3.0, 2.0]);
1999
2000 let empty_tokens = vec![];
2002 let result = parser.parse_dash_array(empty_tokens).unwrap();
2003 let expected: Vec<f32> = vec![];
2004 assert_eq!(result, expected);
2005 }
2006
2007 #[test]
2008 fn test_text_array_parsing_valid() {
2009 let parser = ContentParser::new(b"");
2011
2012 let valid_tokens = vec![
2014 Token::String(b"Hello".to_vec()),
2015 Token::Number(-100.0),
2016 Token::String(b"World".to_vec()),
2017 ];
2018 let result = parser.parse_text_array(valid_tokens).unwrap();
2019 assert_eq!(result.len(), 3);
2020 }
2021
2022 #[test]
2023 fn test_inline_image_handling() {
2024 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2025 let operators = ContentParser::parse(content).unwrap();
2026
2027 assert_eq!(operators.len(), 1);
2028 assert_eq!(operators[0], ContentOperation::BeginInlineImage);
2029 }
2030
2031 #[test]
2032 fn test_content_parser_performance() {
2033 let mut content = Vec::new();
2034 for i in 0..1000 {
2035 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2036 }
2037
2038 let start = std::time::Instant::now();
2039 let operators = ContentParser::parse(&content).unwrap();
2040 let duration = start.elapsed();
2041
2042 assert_eq!(operators.len(), 1000);
2043 assert!(duration.as_millis() < 100); }
2045
2046 #[test]
2047 fn test_tokenizer_performance() {
2048 let mut input = Vec::new();
2049 for i in 0..1000 {
2050 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2051 }
2052
2053 let start = std::time::Instant::now();
2054 let mut tokenizer = ContentTokenizer::new(&input);
2055 let mut count = 0;
2056 while tokenizer.next_token().unwrap().is_some() {
2057 count += 1;
2058 }
2059 let duration = start.elapsed();
2060
2061 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
2064
2065 #[test]
2066 fn test_memory_usage_large_content() {
2067 let mut content = Vec::new();
2068 for i in 0..10000 {
2069 content.extend_from_slice(
2070 format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2071 .as_bytes(),
2072 );
2073 }
2074
2075 let operators = ContentParser::parse(&content).unwrap();
2076 assert_eq!(operators.len(), 10000);
2077
2078 for op in operators {
2080 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2081 }
2082 }
2083
2084 #[test]
2085 fn test_concurrent_parsing() {
2086 use std::sync::Arc;
2087 use std::thread;
2088
2089 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2090 let handles: Vec<_> = (0..10)
2091 .map(|_| {
2092 let content_clone = content.clone();
2093 thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2094 })
2095 .collect();
2096
2097 for handle in handles {
2098 let operators = handle.join().unwrap();
2099 assert_eq!(operators.len(), 5);
2100 assert_eq!(operators[0], ContentOperation::BeginText);
2101 assert_eq!(operators[4], ContentOperation::EndText);
2102 }
2103 }
2104 }
2105}