1use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60#[derive(Debug, Clone, PartialEq)]
94pub enum ContentOperation {
95 BeginText,
99
100 EndText,
103
104 SetCharSpacing(f32),
108
109 SetWordSpacing(f32),
112
113 SetHorizontalScaling(f32),
116
117 SetLeading(f32),
120
121 SetFont(String, f32),
124
125 SetTextRenderMode(i32),
128
129 SetTextRise(f32),
132
133 MoveText(f32, f32),
137
138 MoveTextSetLeading(f32, f32),
141
142 SetTextMatrix(f32, f32, f32, f32, f32, f32),
145
146 NextLine,
149
150 ShowText(Vec<u8>),
154
155 ShowTextArray(Vec<TextElement>),
158
159 NextLineShowText(Vec<u8>),
162
163 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
166
167 SaveGraphicsState,
171
172 RestoreGraphicsState,
175
176 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
179
180 SetLineWidth(f32),
182
183 SetLineCap(i32),
186
187 SetLineJoin(i32),
190
191 SetMiterLimit(f32),
194
195 SetDashPattern(Vec<f32>, f32),
198
199 SetIntent(String),
202
203 SetFlatness(f32),
206
207 SetGraphicsStateParams(String),
210
211 MoveTo(f32, f32),
214
215 LineTo(f32, f32),
217
218 CurveTo(f32, f32, f32, f32, f32, f32),
221
222 CurveToV(f32, f32, f32, f32),
224
225 CurveToY(f32, f32, f32, f32),
227
228 ClosePath,
231
232 Rectangle(f32, f32, f32, f32),
235
236 Stroke,
239
240 CloseStroke,
243
244 Fill,
246
247 FillEvenOdd,
249
250 FillStroke,
253
254 FillStrokeEvenOdd,
256
257 CloseFillStroke,
260
261 CloseFillStrokeEvenOdd,
263
264 EndPath,
267
268 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
276
277 SetNonStrokingColorSpace(String),
280
281 SetStrokingColor(Vec<f32>),
284
285 SetNonStrokingColor(Vec<f32>),
288
289 SetStrokingGray(f32),
292
293 SetNonStrokingGray(f32),
295
296 SetStrokingRGB(f32, f32, f32),
299
300 SetNonStrokingRGB(f32, f32, f32),
302
303 SetStrokingCMYK(f32, f32, f32, f32),
305
306 SetNonStrokingCMYK(f32, f32, f32, f32),
308
309 ShadingFill(String), BeginInlineImage,
315 InlineImage {
317 params: HashMap<String, Object>,
319 data: Vec<u8>,
321 },
322
323 PaintXObject(String),
327
328 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
339
340#[derive(Debug, Clone, PartialEq)]
359pub enum TextElement {
360 Text(Vec<u8>),
362 Spacing(f32),
365}
366
367#[derive(Debug, Clone, PartialEq)]
369pub(super) enum Token {
370 Number(f32),
371 Integer(i32),
372 String(Vec<u8>),
373 HexString(Vec<u8>),
374 Name(String),
375 Operator(String),
376 ArrayStart,
377 ArrayEnd,
378 DictStart,
379 DictEnd,
380 InlineImageData(Vec<u8>),
384}
385
386pub struct ContentTokenizer<'a> {
388 input: &'a [u8],
389 position: usize,
390 in_inline_image: bool,
393}
394
395impl<'a> ContentTokenizer<'a> {
396 pub fn new(input: &'a [u8]) -> Self {
398 Self {
399 input,
400 position: 0,
401 in_inline_image: false,
402 }
403 }
404
405 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
407 if self.in_inline_image {
409 self.in_inline_image = false;
410 return self.read_inline_image_data();
411 }
412
413 self.skip_whitespace();
414
415 if self.position >= self.input.len() {
416 return Ok(None);
417 }
418
419 let ch = self.input[self.position];
420
421 match ch {
422 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
424
425 b'(' => self.read_literal_string(),
427 b'<' => {
428 if self.peek_next() == Some(b'<') {
429 self.position += 2;
430 Ok(Some(Token::DictStart))
431 } else {
432 self.read_hex_string()
433 }
434 }
435 b'>' => {
436 if self.peek_next() == Some(b'>') {
437 self.position += 2;
438 Ok(Some(Token::DictEnd))
439 } else {
440 Err(ParseError::SyntaxError {
441 position: self.position,
442 message: "Unexpected '>'".to_string(),
443 })
444 }
445 }
446
447 b'[' => {
449 self.position += 1;
450 Ok(Some(Token::ArrayStart))
451 }
452 b']' => {
453 self.position += 1;
454 Ok(Some(Token::ArrayEnd))
455 }
456
457 b'/' => self.read_name(),
459
460 b';' | b')' | b'{' | b'}' => {
465 self.position += 1;
466 self.next_token() }
468
469 _ => {
471 let token = self.read_operator()?;
472 if let Some(Token::Operator(ref op)) = token {
474 if op == "ID" {
475 self.in_inline_image = true;
476 }
477 }
478 Ok(token)
479 }
480 }
481 }
482
483 fn skip_whitespace(&mut self) {
484 while self.position < self.input.len() {
485 match self.input[self.position] {
486 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
487 b'%' => self.skip_comment(),
488 _ => break,
489 }
490 }
491 }
492
493 fn skip_comment(&mut self) {
494 while self.position < self.input.len() && self.input[self.position] != b'\n' {
495 self.position += 1;
496 }
497 }
498
499 fn peek_next(&self) -> Option<u8> {
500 if self.position + 1 < self.input.len() {
501 Some(self.input[self.position + 1])
502 } else {
503 None
504 }
505 }
506
507 fn read_number(&mut self) -> ParseResult<Option<Token>> {
508 let start = self.position;
509 let mut has_dot = false;
510
511 if self.position < self.input.len()
513 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
514 {
515 self.position += 1;
516 }
517
518 while self.position < self.input.len() {
520 match self.input[self.position] {
521 b'0'..=b'9' => self.position += 1,
522 b'.' if !has_dot => {
523 has_dot = true;
524 self.position += 1;
525 }
526 _ => break,
527 }
528 }
529
530 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
531 ParseError::SyntaxError {
532 position: start,
533 message: "Invalid number format".to_string(),
534 }
535 })?;
536
537 if has_dot {
538 let value = num_str
539 .parse::<f32>()
540 .map_err(|_| ParseError::SyntaxError {
541 position: start,
542 message: "Invalid float number".to_string(),
543 })?;
544 Ok(Some(Token::Number(value)))
545 } else {
546 let value = num_str
547 .parse::<i32>()
548 .map_err(|_| ParseError::SyntaxError {
549 position: start,
550 message: "Invalid integer number".to_string(),
551 })?;
552 Ok(Some(Token::Integer(value)))
553 }
554 }
555
556 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
557 self.position += 1; let mut result = Vec::new();
559 let mut paren_depth = 1;
560 let mut escape = false;
561
562 while self.position < self.input.len() && paren_depth > 0 {
563 let ch = self.input[self.position];
564 self.position += 1;
565
566 if escape {
567 match ch {
568 b'n' => result.push(b'\n'),
569 b'r' => result.push(b'\r'),
570 b't' => result.push(b'\t'),
571 b'b' => result.push(b'\x08'),
572 b'f' => result.push(b'\x0C'),
573 b'(' => result.push(b'('),
574 b')' => result.push(b')'),
575 b'\\' => result.push(b'\\'),
576 b'0'..=b'7' => {
577 self.position -= 1;
579 let octal_value = self.read_octal_escape()?;
580 result.push(octal_value);
581 }
582 _ => result.push(ch), }
584 escape = false;
585 } else {
586 match ch {
587 b'\\' => escape = true,
588 b'(' => {
589 paren_depth += 1;
590 result.push(ch);
591 }
592 b')' => {
593 paren_depth -= 1;
594 if paren_depth > 0 {
595 result.push(ch);
596 }
597 }
598 _ => result.push(ch),
599 }
600 }
601 }
602
603 Ok(Some(Token::String(result)))
604 }
605
606 fn read_octal_escape(&mut self) -> ParseResult<u8> {
607 let mut value = 0u16;
610 let mut count = 0;
611
612 while count < 3 && self.position < self.input.len() {
613 match self.input[self.position] {
614 b'0'..=b'7' => {
615 value = value * 8 + u16::from(self.input[self.position] - b'0');
616 self.position += 1;
617 count += 1;
618 }
619 _ => break,
620 }
621 }
622
623 Ok(value as u8)
624 }
625
626 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
627 self.position += 1; let mut result = Vec::new();
629 let mut nibble = None;
630
631 while self.position < self.input.len() {
632 let ch = self.input[self.position];
633
634 match ch {
635 b'>' => {
636 self.position += 1;
637 if let Some(n) = nibble {
639 result.push(n << 4);
640 }
641 return Ok(Some(Token::HexString(result)));
642 }
643 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
644 let digit = if ch <= b'9' {
645 ch - b'0'
646 } else if ch <= b'F' {
647 ch - b'A' + 10
648 } else {
649 ch - b'a' + 10
650 };
651
652 if let Some(n) = nibble {
653 result.push((n << 4) | digit);
654 nibble = None;
655 } else {
656 nibble = Some(digit);
657 }
658 self.position += 1;
659 }
660 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
661 self.position += 1;
663 }
664 _ => {
665 return Err(ParseError::SyntaxError {
666 position: self.position,
667 message: format!("Invalid character in hex string: {:?}", ch as char),
668 });
669 }
670 }
671 }
672
673 Err(ParseError::SyntaxError {
674 position: self.position,
675 message: "Unterminated hex string".to_string(),
676 })
677 }
678
679 fn read_name(&mut self) -> ParseResult<Option<Token>> {
680 self.position += 1; let start = self.position;
682
683 while self.position < self.input.len() {
684 let ch = self.input[self.position];
685 match ch {
686 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
687 | b']' | b'{' | b'}' | b'/' | b'%' => break,
688 b'#' => {
689 self.position += 1;
691 if self.position + 1 < self.input.len() {
692 self.position += 2;
693 }
694 }
695 _ => self.position += 1,
696 }
697 }
698
699 let name_bytes = &self.input[start..self.position];
700 let name = self.decode_name(name_bytes)?;
701 Ok(Some(Token::Name(name)))
702 }
703
704 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
705 let mut result = Vec::new();
706 let mut i = 0;
707
708 while i < bytes.len() {
709 if bytes[i] == b'#' && i + 2 < bytes.len() {
710 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
712 ParseError::SyntaxError {
713 position: self.position,
714 message: "Invalid hex escape in name".to_string(),
715 }
716 })?;
717 let value =
718 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
719 position: self.position,
720 message: "Invalid hex escape in name".to_string(),
721 })?;
722 result.push(value);
723 i += 3;
724 } else {
725 result.push(bytes[i]);
726 i += 1;
727 }
728 }
729
730 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
731 position: self.position,
732 message: "Invalid UTF-8 in name".to_string(),
733 })
734 }
735
736 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
737 let start = self.position;
738
739 while self.position < self.input.len() {
740 let ch = self.input[self.position];
741 match ch {
742 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
743 | b']' | b'{' | b'}' | b'/' | b'%' | b';' => break,
744 _ => self.position += 1,
745 }
746 }
747
748 let op_bytes = &self.input[start..self.position];
749 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
750 position: start,
751 message: "Invalid operator".to_string(),
752 })?;
753
754 Ok(Some(Token::Operator(op.to_string())))
755 }
756
757 fn read_inline_image_data(&mut self) -> ParseResult<Option<Token>> {
763 if self.position < self.input.len() {
765 let ch = self.input[self.position];
766 if ch == b' ' || ch == b'\n' || ch == b'\r' || ch == b'\t' {
767 self.position += 1;
768 if ch == b'\r'
770 && self.position < self.input.len()
771 && self.input[self.position] == b'\n'
772 {
773 self.position += 1;
774 }
775 }
776 }
777
778 let start = self.position;
779
780 while self.position + 1 < self.input.len() {
782 let preceded_by_whitespace = self.position == start
783 || matches!(
784 self.input[self.position - 1],
785 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C'
786 );
787
788 if preceded_by_whitespace
789 && self.input[self.position] == b'E'
790 && self.input[self.position + 1] == b'I'
791 {
792 let after_ei = self.position + 2;
793 let followed_by_boundary = after_ei >= self.input.len()
794 || matches!(
795 self.input[after_ei],
796 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'/' | b'<' | b'(' | b'[' | b'%'
797 );
798
799 if followed_by_boundary {
800 let mut end = self.position;
802 if end > start
803 && matches!(self.input[end - 1], b' ' | b'\t' | b'\r' | b'\n' | b'\x0C')
804 {
805 end -= 1;
806 }
807 let data = self.input[start..end].to_vec();
808 self.position = after_ei; return Ok(Some(Token::InlineImageData(data)));
810 }
811 }
812 self.position += 1;
813 }
814
815 let data = self.input[start..].to_vec();
817 self.position = self.input.len();
818 Ok(Some(Token::InlineImageData(data)))
819 }
820}
821
822pub struct ContentParser {
841 tokens: Vec<Token>,
842 position: usize,
843}
844
845impl ContentParser {
846 pub fn new(_content: &[u8]) -> Self {
848 Self {
849 tokens: Vec::new(),
850 position: 0,
851 }
852 }
853
854 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
888 Self::parse_content(content)
889 }
890
891 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
896 let mut tokenizer = ContentTokenizer::new(content);
897 let mut tokens = Vec::new();
898
899 while let Some(token) = tokenizer.next_token()? {
901 tokens.push(token);
902 }
903
904 let mut parser = Self {
905 tokens,
906 position: 0,
907 };
908
909 parser.parse_operators()
910 }
911
912 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
913 let mut operators = Vec::new();
914 let mut operand_stack: Vec<Token> = Vec::new();
915
916 while self.position < self.tokens.len() {
917 let token = self.tokens[self.position].clone();
918 self.position += 1;
919
920 match &token {
921 Token::Operator(op) => {
922 let operator = self.parse_operator(op, &mut operand_stack)?;
923 operators.push(operator);
924 }
925 _ => {
926 operand_stack.push(token);
928 }
929 }
930 }
931
932 Ok(operators)
933 }
934
935 fn parse_operator(
936 &mut self,
937 op: &str,
938 operands: &mut Vec<Token>,
939 ) -> ParseResult<ContentOperation> {
940 let operator = match op {
941 "BT" => ContentOperation::BeginText,
943 "ET" => ContentOperation::EndText,
944
945 "Tc" => {
947 let spacing = self.pop_number(operands)?;
948 ContentOperation::SetCharSpacing(spacing)
949 }
950 "Tw" => {
951 let spacing = self.pop_number(operands)?;
952 ContentOperation::SetWordSpacing(spacing)
953 }
954 "Tz" => {
955 let scale = self.pop_number(operands)?;
956 ContentOperation::SetHorizontalScaling(scale)
957 }
958 "TL" => {
959 let leading = self.pop_number(operands)?;
960 ContentOperation::SetLeading(leading)
961 }
962 "Tf" => {
963 let size = self.pop_number(operands)?;
964 let font = self.pop_name(operands)?;
965 ContentOperation::SetFont(font, size)
966 }
967 "Tr" => {
968 let mode = self.pop_integer(operands)?;
969 ContentOperation::SetTextRenderMode(mode)
970 }
971 "Ts" => {
972 let rise = self.pop_number(operands)?;
973 ContentOperation::SetTextRise(rise)
974 }
975
976 "Td" => {
978 let ty = self.pop_number(operands)?;
979 let tx = self.pop_number(operands)?;
980 ContentOperation::MoveText(tx, ty)
981 }
982 "TD" => {
983 let ty = self.pop_number(operands)?;
984 let tx = self.pop_number(operands)?;
985 ContentOperation::MoveTextSetLeading(tx, ty)
986 }
987 "Tm" => {
988 let f = self.pop_number(operands)?;
989 let e = self.pop_number(operands)?;
990 let d = self.pop_number(operands)?;
991 let c = self.pop_number(operands)?;
992 let b = self.pop_number(operands)?;
993 let a = self.pop_number(operands)?;
994 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
995 }
996 "T*" => ContentOperation::NextLine,
997
998 "Tj" => {
1000 let text = self.pop_string(operands)?;
1001 ContentOperation::ShowText(text)
1002 }
1003 "TJ" => {
1004 let array = self.pop_array(operands)?;
1005 let elements = self.parse_text_array(array)?;
1006 ContentOperation::ShowTextArray(elements)
1007 }
1008 "'" => {
1009 let text = self.pop_string(operands)?;
1010 ContentOperation::NextLineShowText(text)
1011 }
1012 "\"" => {
1013 let text = self.pop_string(operands)?;
1019 let ac = self.pop_number(operands)?;
1020 let aw = self.pop_number(operands)?;
1021 ContentOperation::SetSpacingNextLineShowText(aw, ac, text)
1022 }
1023
1024 "q" => ContentOperation::SaveGraphicsState,
1026 "Q" => ContentOperation::RestoreGraphicsState,
1027 "cm" => {
1028 let f = self.pop_number(operands)?;
1029 let e = self.pop_number(operands)?;
1030 let d = self.pop_number(operands)?;
1031 let c = self.pop_number(operands)?;
1032 let b = self.pop_number(operands)?;
1033 let a = self.pop_number(operands)?;
1034 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
1035 }
1036 "w" => {
1037 let width = self.pop_number(operands)?;
1038 ContentOperation::SetLineWidth(width)
1039 }
1040 "J" => {
1041 let cap = self.pop_integer(operands)?;
1042 ContentOperation::SetLineCap(cap)
1043 }
1044 "j" => {
1045 let join = self.pop_integer(operands)?;
1046 ContentOperation::SetLineJoin(join)
1047 }
1048 "M" => {
1049 let limit = self.pop_number(operands)?;
1050 ContentOperation::SetMiterLimit(limit)
1051 }
1052 "d" => {
1053 let phase = self.pop_number(operands)?;
1054 let array = self.pop_array(operands)?;
1055 let pattern = self.parse_dash_array(array)?;
1056 ContentOperation::SetDashPattern(pattern, phase)
1057 }
1058 "ri" => {
1059 let intent = self.pop_name(operands)?;
1060 ContentOperation::SetIntent(intent)
1061 }
1062 "i" => {
1063 let flatness = self.pop_number(operands)?;
1064 ContentOperation::SetFlatness(flatness)
1065 }
1066 "gs" => {
1067 let name = self.pop_name(operands)?;
1068 ContentOperation::SetGraphicsStateParams(name)
1069 }
1070
1071 "m" => {
1073 let y = self.pop_number(operands)?;
1074 let x = self.pop_number(operands)?;
1075 ContentOperation::MoveTo(x, y)
1076 }
1077 "l" => {
1078 let y = self.pop_number(operands)?;
1079 let x = self.pop_number(operands)?;
1080 ContentOperation::LineTo(x, y)
1081 }
1082 "c" => {
1083 let y3 = self.pop_number(operands)?;
1084 let x3 = self.pop_number(operands)?;
1085 let y2 = self.pop_number(operands)?;
1086 let x2 = self.pop_number(operands)?;
1087 let y1 = self.pop_number(operands)?;
1088 let x1 = self.pop_number(operands)?;
1089 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
1090 }
1091 "v" => {
1092 let y3 = self.pop_number(operands)?;
1093 let x3 = self.pop_number(operands)?;
1094 let y2 = self.pop_number(operands)?;
1095 let x2 = self.pop_number(operands)?;
1096 ContentOperation::CurveToV(x2, y2, x3, y3)
1097 }
1098 "y" => {
1099 let y3 = self.pop_number(operands)?;
1100 let x3 = self.pop_number(operands)?;
1101 let y1 = self.pop_number(operands)?;
1102 let x1 = self.pop_number(operands)?;
1103 ContentOperation::CurveToY(x1, y1, x3, y3)
1104 }
1105 "h" => ContentOperation::ClosePath,
1106 "re" => {
1107 let height = self.pop_number(operands)?;
1108 let width = self.pop_number(operands)?;
1109 let y = self.pop_number(operands)?;
1110 let x = self.pop_number(operands)?;
1111 ContentOperation::Rectangle(x, y, width, height)
1112 }
1113
1114 "S" => ContentOperation::Stroke,
1116 "s" => ContentOperation::CloseStroke,
1117 "f" | "F" => ContentOperation::Fill,
1118 "f*" => ContentOperation::FillEvenOdd,
1119 "B" => ContentOperation::FillStroke,
1120 "B*" => ContentOperation::FillStrokeEvenOdd,
1121 "b" => ContentOperation::CloseFillStroke,
1122 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1123 "n" => ContentOperation::EndPath,
1124
1125 "W" => ContentOperation::Clip,
1127 "W*" => ContentOperation::ClipEvenOdd,
1128
1129 "CS" => {
1131 let name = self.pop_name(operands)?;
1132 ContentOperation::SetStrokingColorSpace(name)
1133 }
1134 "cs" => {
1135 let name = self.pop_name(operands)?;
1136 ContentOperation::SetNonStrokingColorSpace(name)
1137 }
1138 "SC" | "SCN" => {
1139 let components = self.pop_color_components(operands)?;
1140 ContentOperation::SetStrokingColor(components)
1141 }
1142 "sc" | "scn" => {
1143 let components = self.pop_color_components(operands)?;
1144 ContentOperation::SetNonStrokingColor(components)
1145 }
1146 "G" => {
1147 let gray = self.pop_number(operands)?;
1148 ContentOperation::SetStrokingGray(gray)
1149 }
1150 "g" => {
1151 let gray = self.pop_number(operands)?;
1152 ContentOperation::SetNonStrokingGray(gray)
1153 }
1154 "RG" => {
1155 let b = self.pop_number(operands)?;
1156 let g = self.pop_number(operands)?;
1157 let r = self.pop_number(operands)?;
1158 ContentOperation::SetStrokingRGB(r, g, b)
1159 }
1160 "rg" => {
1161 let b = self.pop_number(operands)?;
1162 let g = self.pop_number(operands)?;
1163 let r = self.pop_number(operands)?;
1164 ContentOperation::SetNonStrokingRGB(r, g, b)
1165 }
1166 "K" => {
1167 let k = self.pop_number(operands)?;
1168 let y = self.pop_number(operands)?;
1169 let m = self.pop_number(operands)?;
1170 let c = self.pop_number(operands)?;
1171 ContentOperation::SetStrokingCMYK(c, m, y, k)
1172 }
1173 "k" => {
1174 let k = self.pop_number(operands)?;
1175 let y = self.pop_number(operands)?;
1176 let m = self.pop_number(operands)?;
1177 let c = self.pop_number(operands)?;
1178 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1179 }
1180
1181 "sh" => {
1183 let name = self.pop_name(operands)?;
1184 ContentOperation::ShadingFill(name)
1185 }
1186
1187 "Do" => {
1189 let name = self.pop_name(operands)?;
1190 ContentOperation::PaintXObject(name)
1191 }
1192
1193 "BMC" => {
1195 let tag = self.pop_name(operands)?;
1196 ContentOperation::BeginMarkedContent(tag)
1197 }
1198 "BDC" => {
1199 let props = self.pop_dict_or_name(operands)?;
1200 let tag = self.pop_name(operands)?;
1201 ContentOperation::BeginMarkedContentWithProps(tag, props)
1202 }
1203 "EMC" => ContentOperation::EndMarkedContent,
1204 "MP" => {
1205 let tag = self.pop_name(operands)?;
1206 ContentOperation::DefineMarkedContentPoint(tag)
1207 }
1208 "DP" => {
1209 let props = self.pop_dict_or_name(operands)?;
1210 let tag = self.pop_name(operands)?;
1211 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1212 }
1213
1214 "BX" => ContentOperation::BeginCompatibility,
1216 "EX" => ContentOperation::EndCompatibility,
1217
1218 "BI" => {
1220 operands.clear(); self.parse_inline_image()?
1222 }
1223
1224 _ => {
1225 return Err(ParseError::SyntaxError {
1226 position: self.position,
1227 message: format!("Unknown operator: {op}"),
1228 });
1229 }
1230 };
1231
1232 operands.clear(); Ok(operator)
1234 }
1235
1236 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1238 match operands.pop() {
1239 Some(Token::Number(n)) => Ok(n),
1240 Some(Token::Integer(i)) => Ok(i as f32),
1241 _ => Err(ParseError::SyntaxError {
1242 position: self.position,
1243 message: "Expected number operand".to_string(),
1244 }),
1245 }
1246 }
1247
1248 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1249 match operands.pop() {
1250 Some(Token::Integer(i)) => Ok(i),
1251 _ => Err(ParseError::SyntaxError {
1252 position: self.position,
1253 message: "Expected integer operand".to_string(),
1254 }),
1255 }
1256 }
1257
1258 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1259 match operands.pop() {
1260 Some(Token::Name(n)) => Ok(n),
1261 _ => Err(ParseError::SyntaxError {
1262 position: self.position,
1263 message: "Expected name operand".to_string(),
1264 }),
1265 }
1266 }
1267
1268 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1269 match operands.pop() {
1270 Some(Token::String(s)) => Ok(s),
1271 Some(Token::HexString(s)) => Ok(s),
1272 _ => Err(ParseError::SyntaxError {
1273 position: self.position,
1274 message: "Expected string operand".to_string(),
1275 }),
1276 }
1277 }
1278
1279 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1280 let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1282 if has_array_end {
1283 operands.pop(); }
1285
1286 let mut array = Vec::new();
1287 let mut found_start = false;
1288
1289 while let Some(token) = operands.pop() {
1291 match token {
1292 Token::ArrayStart => {
1293 found_start = true;
1294 break;
1295 }
1296 Token::ArrayEnd => {
1297 continue;
1299 }
1300 _ => array.push(token),
1301 }
1302 }
1303
1304 if !found_start {
1305 return Err(ParseError::SyntaxError {
1306 position: self.position,
1307 message: "Expected array".to_string(),
1308 });
1309 }
1310
1311 array.reverse(); Ok(array)
1313 }
1314
1315 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1316 if let Some(token) = operands.pop() {
1317 match token {
1318 Token::Name(name) => {
1319 let mut props = HashMap::new();
1322 props.insert("__resource_ref".to_string(), name);
1323 Ok(props)
1324 }
1325 Token::DictEnd => {
1326 let mut props = HashMap::new();
1330
1331 while let Some(value_token) = operands.pop() {
1333 if matches!(value_token, Token::DictStart) {
1334 break;
1335 }
1336
1337 let value = match &value_token {
1341 Token::Name(name) => name.clone(),
1342 Token::String(s) => String::from_utf8_lossy(s).to_string(),
1343 Token::Integer(i) => i.to_string(),
1344 Token::Number(f) => f.to_string(),
1345 Token::ArrayEnd => {
1346 let mut array_elements = Vec::new();
1348 while let Some(arr_token) = operands.pop() {
1349 match arr_token {
1350 Token::ArrayStart => break,
1351 Token::Name(n) => array_elements.push(n),
1352 Token::String(s) => array_elements
1353 .push(String::from_utf8_lossy(&s).to_string()),
1354 Token::Integer(i) => array_elements.push(i.to_string()),
1355 Token::Number(f) => array_elements.push(f.to_string()),
1356 _ => {} }
1358 }
1359 array_elements.reverse();
1360 format!("[{}]", array_elements.join(", "))
1361 }
1362 _ => continue, };
1364
1365 if let Some(Token::Name(key)) = operands.pop() {
1367 props.insert(key, value);
1368 }
1369 }
1370
1371 Ok(props)
1372 }
1373 _ => {
1374 Ok(HashMap::new())
1376 }
1377 }
1378 } else {
1379 Err(ParseError::SyntaxError {
1381 position: 0,
1382 message: "Expected dictionary or name for marked content properties".to_string(),
1383 })
1384 }
1385 }
1386
1387 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1388 let mut components = Vec::new();
1389
1390 while let Some(token) = operands.last() {
1392 match token {
1393 Token::Number(n) => {
1394 components.push(*n);
1395 operands.pop();
1396 }
1397 Token::Integer(i) => {
1398 components.push(*i as f32);
1399 operands.pop();
1400 }
1401 _ => break,
1402 }
1403 }
1404
1405 components.reverse();
1406 Ok(components)
1407 }
1408
1409 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1410 let mut elements = Vec::new();
1411
1412 for token in tokens {
1413 match token {
1414 Token::String(s) | Token::HexString(s) => {
1415 elements.push(TextElement::Text(s));
1416 }
1417 Token::Number(n) => {
1418 elements.push(TextElement::Spacing(n));
1419 }
1420 Token::Integer(i) => {
1421 elements.push(TextElement::Spacing(i as f32));
1422 }
1423 _ => {
1424 return Err(ParseError::SyntaxError {
1425 position: self.position,
1426 message: "Invalid element in text array".to_string(),
1427 });
1428 }
1429 }
1430 }
1431
1432 Ok(elements)
1433 }
1434
1435 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1436 let mut pattern = Vec::new();
1437
1438 for token in tokens {
1439 match token {
1440 Token::Number(n) => pattern.push(n),
1441 Token::Integer(i) => pattern.push(i as f32),
1442 _ => {
1443 return Err(ParseError::SyntaxError {
1444 position: self.position,
1445 message: "Invalid element in dash array".to_string(),
1446 });
1447 }
1448 }
1449 }
1450
1451 Ok(pattern)
1452 }
1453
1454 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1455 let mut params = HashMap::new();
1457
1458 while self.position < self.tokens.len() {
1459 if let Token::Operator(op) = &self.tokens[self.position] {
1461 if op == "ID" {
1462 self.position += 1;
1463 break;
1464 }
1465 }
1466
1467 if let Token::Name(key) = &self.tokens[self.position] {
1472 self.position += 1;
1473 if self.position >= self.tokens.len() {
1474 break;
1475 }
1476
1477 let value = match &self.tokens[self.position] {
1479 Token::Integer(n) => Object::Integer(*n as i64),
1480 Token::Number(n) => Object::Real(*n as f64),
1481 Token::Name(s) => Object::Name(expand_inline_name(s)),
1482 Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1483 Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1484 _ => Object::Null,
1485 };
1486
1487 let full_key = expand_inline_key(key);
1489 params.insert(full_key, value);
1490 self.position += 1;
1491 } else {
1492 self.position += 1;
1493 }
1494 }
1495
1496 let data = if self.position < self.tokens.len() {
1499 if let Token::InlineImageData(bytes) = &self.tokens[self.position] {
1500 let d = bytes.clone();
1501 self.position += 1;
1502 d
1503 } else {
1504 self.collect_inline_image_data_from_tokens()?
1506 }
1507 } else {
1508 Vec::new()
1509 };
1510
1511 Ok(ContentOperation::InlineImage { params, data })
1512 }
1513
1514 fn collect_inline_image_data_from_tokens(&mut self) -> ParseResult<Vec<u8>> {
1517 let mut data = Vec::new();
1518 while self.position < self.tokens.len() {
1519 if let Token::Operator(op) = &self.tokens[self.position] {
1520 if op == "EI" {
1521 self.position += 1;
1522 break;
1523 }
1524 }
1525 match &self.tokens[self.position] {
1526 Token::String(bytes) | Token::HexString(bytes) => {
1527 data.extend_from_slice(bytes);
1528 }
1529 Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1530 Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1531 Token::Name(s) | Token::Operator(s) => data.extend_from_slice(s.as_bytes()),
1532 _ => {}
1533 }
1534 self.position += 1;
1535 }
1536 Ok(data)
1537 }
1538}
1539
1540fn expand_inline_key(key: &str) -> String {
1542 match key {
1543 "W" => "Width".to_string(),
1544 "H" => "Height".to_string(),
1545 "CS" | "ColorSpace" => "ColorSpace".to_string(),
1546 "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1547 "F" => "Filter".to_string(),
1548 "DP" | "DecodeParms" => "DecodeParms".to_string(),
1549 "IM" => "ImageMask".to_string(),
1550 "I" => "Interpolate".to_string(),
1551 "Intent" => "Intent".to_string(),
1552 "D" => "Decode".to_string(),
1553 _ => key.to_string(),
1554 }
1555}
1556
1557fn expand_inline_name(name: &str) -> String {
1559 match name {
1560 "G" => "DeviceGray".to_string(),
1561 "RGB" => "DeviceRGB".to_string(),
1562 "CMYK" => "DeviceCMYK".to_string(),
1563 "I" => "Indexed".to_string(),
1564 "AHx" => "ASCIIHexDecode".to_string(),
1565 "A85" => "ASCII85Decode".to_string(),
1566 "LZW" => "LZWDecode".to_string(),
1567 "Fl" => "FlateDecode".to_string(),
1568 "RL" => "RunLengthDecode".to_string(),
1569 "DCT" => "DCTDecode".to_string(),
1570 "CCF" => "CCITTFaxDecode".to_string(),
1571 _ => name.to_string(),
1572 }
1573}
1574
1575#[cfg(test)]
1576mod tests {
1577 use super::*;
1578
1579 #[test]
1580 fn test_tokenize_numbers() {
1581 let input = b"123 -45 3.14159 -0.5 .5";
1582 let mut tokenizer = ContentTokenizer::new(input);
1583
1584 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1585 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1586 assert_eq!(
1587 tokenizer.next_token().unwrap(),
1588 Some(Token::Number(3.14159))
1589 );
1590 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1591 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1592 assert_eq!(tokenizer.next_token().unwrap(), None);
1593 }
1594
1595 #[test]
1596 fn test_tokenize_strings() {
1597 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1598 let mut tokenizer = ContentTokenizer::new(input);
1599
1600 assert_eq!(
1601 tokenizer.next_token().unwrap(),
1602 Some(Token::String(b"Hello World".to_vec()))
1603 );
1604 assert_eq!(
1605 tokenizer.next_token().unwrap(),
1606 Some(Token::String(b"Hello\nWorld".to_vec()))
1607 );
1608 assert_eq!(
1609 tokenizer.next_token().unwrap(),
1610 Some(Token::String(b"Nested (paren)".to_vec()))
1611 );
1612 }
1613
1614 #[test]
1615 fn test_tokenize_hex_strings() {
1616 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1617 let mut tokenizer = ContentTokenizer::new(input);
1618
1619 assert_eq!(
1620 tokenizer.next_token().unwrap(),
1621 Some(Token::HexString(b"Hello".to_vec()))
1622 );
1623 assert_eq!(
1624 tokenizer.next_token().unwrap(),
1625 Some(Token::HexString(b"Hello".to_vec()))
1626 );
1627 }
1628
1629 #[test]
1630 fn test_tokenize_names() {
1631 let input = b"/Name /Name#20with#20spaces /A#42C";
1632 let mut tokenizer = ContentTokenizer::new(input);
1633
1634 assert_eq!(
1635 tokenizer.next_token().unwrap(),
1636 Some(Token::Name("Name".to_string()))
1637 );
1638 assert_eq!(
1639 tokenizer.next_token().unwrap(),
1640 Some(Token::Name("Name with spaces".to_string()))
1641 );
1642 assert_eq!(
1643 tokenizer.next_token().unwrap(),
1644 Some(Token::Name("ABC".to_string()))
1645 );
1646 }
1647
1648 #[test]
1649 fn test_tokenize_operators() {
1650 let input = b"BT Tj ET q Q";
1651 let mut tokenizer = ContentTokenizer::new(input);
1652
1653 assert_eq!(
1654 tokenizer.next_token().unwrap(),
1655 Some(Token::Operator("BT".to_string()))
1656 );
1657 assert_eq!(
1658 tokenizer.next_token().unwrap(),
1659 Some(Token::Operator("Tj".to_string()))
1660 );
1661 assert_eq!(
1662 tokenizer.next_token().unwrap(),
1663 Some(Token::Operator("ET".to_string()))
1664 );
1665 assert_eq!(
1666 tokenizer.next_token().unwrap(),
1667 Some(Token::Operator("q".to_string()))
1668 );
1669 assert_eq!(
1670 tokenizer.next_token().unwrap(),
1671 Some(Token::Operator("Q".to_string()))
1672 );
1673 }
1674
1675 #[test]
1676 fn test_parse_text_operators() {
1677 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1678 let operators = ContentParser::parse(content).unwrap();
1679
1680 assert_eq!(operators.len(), 5);
1681 assert_eq!(operators[0], ContentOperation::BeginText);
1682 assert_eq!(
1683 operators[1],
1684 ContentOperation::SetFont("F1".to_string(), 12.0)
1685 );
1686 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1687 assert_eq!(
1688 operators[3],
1689 ContentOperation::ShowText(b"Hello World".to_vec())
1690 );
1691 assert_eq!(operators[4], ContentOperation::EndText);
1692 }
1693
1694 #[test]
1695 fn test_parse_graphics_operators() {
1696 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1697 let operators = ContentParser::parse(content).unwrap();
1698
1699 assert_eq!(operators.len(), 6);
1700 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1701 assert_eq!(
1702 operators[1],
1703 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1704 );
1705 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1706 assert_eq!(
1707 operators[3],
1708 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1709 );
1710 assert_eq!(operators[4], ContentOperation::Stroke);
1711 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1712 }
1713
1714 #[test]
1715 fn test_parse_color_operators() {
1716 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1717 let operators = ContentParser::parse(content).unwrap();
1718
1719 assert_eq!(operators.len(), 3);
1720 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1721 assert_eq!(
1722 operators[1],
1723 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1724 );
1725 assert_eq!(
1726 operators[2],
1727 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1728 );
1729 }
1730
1731 mod comprehensive_tests {
1733 use super::*;
1734
1735 #[test]
1736 fn test_all_text_operators() {
1737 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1739 let operators = ContentParser::parse(content).unwrap();
1740
1741 assert_eq!(operators[0], ContentOperation::BeginText);
1742 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1743 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1744 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1745 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1746 assert_eq!(
1747 operators[5],
1748 ContentOperation::SetFont("F1".to_string(), 12.0)
1749 );
1750 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1751 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1752 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1753 assert_eq!(
1754 operators[9],
1755 ContentOperation::MoveTextSetLeading(50.0, 150.0)
1756 );
1757 assert_eq!(operators[10], ContentOperation::NextLine);
1758 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1759 assert_eq!(operators[12], ContentOperation::EndText);
1760 }
1761
1762 #[test]
1763 fn test_all_graphics_state_operators() {
1764 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1766 let operators = ContentParser::parse(content).unwrap();
1767
1768 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1769 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1770 assert_eq!(
1771 operators[2],
1772 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1773 );
1774 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1775 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1776 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1777 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1778 assert_eq!(
1779 operators[7],
1780 ContentOperation::SetGraphicsStateParams("GS1".to_string())
1781 );
1782 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1783 assert_eq!(
1784 operators[9],
1785 ContentOperation::SetIntent("Perceptual".to_string())
1786 );
1787 }
1788
1789 #[test]
1790 fn test_all_path_construction_operators() {
1791 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1792 let operators = ContentParser::parse(content).unwrap();
1793
1794 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1795 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1796 assert_eq!(
1797 operators[2],
1798 ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1799 );
1800 assert_eq!(
1801 operators[3],
1802 ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1803 );
1804 assert_eq!(
1805 operators[4],
1806 ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1807 );
1808 assert_eq!(operators[5], ContentOperation::ClosePath);
1809 assert_eq!(
1810 operators[6],
1811 ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1812 );
1813 }
1814
1815 #[test]
1816 fn test_all_path_painting_operators() {
1817 let content = b"S s f F f* B B* b b* n W W*";
1818 let operators = ContentParser::parse(content).unwrap();
1819
1820 assert_eq!(operators[0], ContentOperation::Stroke);
1821 assert_eq!(operators[1], ContentOperation::CloseStroke);
1822 assert_eq!(operators[2], ContentOperation::Fill);
1823 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1825 assert_eq!(operators[5], ContentOperation::FillStroke);
1826 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1827 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1828 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1829 assert_eq!(operators[9], ContentOperation::EndPath);
1830 assert_eq!(operators[10], ContentOperation::Clip);
1831 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1832 }
1833
1834 #[test]
1835 fn test_all_color_operators() {
1836 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1838 let operators = ContentParser::parse(content).unwrap();
1839
1840 assert_eq!(
1841 operators[0],
1842 ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1843 );
1844 assert_eq!(
1845 operators[1],
1846 ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1847 );
1848 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1849 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1850 assert_eq!(
1851 operators[4],
1852 ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1853 );
1854 assert_eq!(
1855 operators[5],
1856 ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1857 );
1858 assert_eq!(
1859 operators[6],
1860 ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1861 );
1862 assert_eq!(
1863 operators[7],
1864 ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1865 );
1866 assert_eq!(
1867 operators[8],
1868 ContentOperation::ShadingFill("Shade1".to_string())
1869 );
1870 }
1871
1872 #[test]
1873 fn test_xobject_and_marked_content_operators() {
1874 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1876 let operators = ContentParser::parse(content).unwrap();
1877
1878 assert_eq!(
1879 operators[0],
1880 ContentOperation::PaintXObject("Image1".to_string())
1881 );
1882 assert_eq!(
1883 operators[1],
1884 ContentOperation::BeginMarkedContent("MC1".to_string())
1885 );
1886 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1887 assert_eq!(
1888 operators[3],
1889 ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1890 );
1891 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1892 assert_eq!(operators[5], ContentOperation::EndCompatibility);
1893 }
1894
1895 #[test]
1896 fn test_complex_content_stream() {
1897 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1898 let operators = ContentParser::parse(content).unwrap();
1899
1900 assert_eq!(operators.len(), 8);
1901 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1902 assert_eq!(
1903 operators[1],
1904 ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1905 );
1906 assert_eq!(operators[2], ContentOperation::BeginText);
1907 assert_eq!(
1908 operators[3],
1909 ContentOperation::SetFont("F1".to_string(), 12.0)
1910 );
1911 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1912 assert_eq!(
1913 operators[5],
1914 ContentOperation::ShowText(b"Complex".to_vec())
1915 );
1916 assert_eq!(operators[6], ContentOperation::EndText);
1917 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1918 }
1919
1920 #[test]
1921 fn test_tokenizer_whitespace_handling() {
1922 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
1923 let mut tokenizer = ContentTokenizer::new(input);
1924
1925 assert_eq!(
1926 tokenizer.next_token().unwrap(),
1927 Some(Token::Operator("BT".to_string()))
1928 );
1929 assert_eq!(
1930 tokenizer.next_token().unwrap(),
1931 Some(Token::Name("F1".to_string()))
1932 );
1933 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1934 assert_eq!(
1935 tokenizer.next_token().unwrap(),
1936 Some(Token::Operator("Tf".to_string()))
1937 );
1938 assert_eq!(
1939 tokenizer.next_token().unwrap(),
1940 Some(Token::Operator("ET".to_string()))
1941 );
1942 assert_eq!(tokenizer.next_token().unwrap(), None);
1943 }
1944
1945 #[test]
1946 fn test_tokenizer_edge_cases() {
1947 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1949 let mut tokenizer = ContentTokenizer::new(input);
1950
1951 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1952 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1953 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1954 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1955 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1956 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1957 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1958 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1959 }
1960
1961 #[test]
1962 fn test_string_parsing_edge_cases() {
1963 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1964 let mut tokenizer = ContentTokenizer::new(input);
1965
1966 assert_eq!(
1967 tokenizer.next_token().unwrap(),
1968 Some(Token::String(b"Simple".to_vec()))
1969 );
1970 assert_eq!(
1971 tokenizer.next_token().unwrap(),
1972 Some(Token::String(b"With\\backslash".to_vec()))
1973 );
1974 assert_eq!(
1975 tokenizer.next_token().unwrap(),
1976 Some(Token::String(b"With)paren".to_vec()))
1977 );
1978 assert_eq!(
1979 tokenizer.next_token().unwrap(),
1980 Some(Token::String(b"With\newline".to_vec()))
1981 );
1982 assert_eq!(
1983 tokenizer.next_token().unwrap(),
1984 Some(Token::String(b"With\ttab".to_vec()))
1985 );
1986 assert_eq!(
1987 tokenizer.next_token().unwrap(),
1988 Some(Token::String(b"With\rcarriage".to_vec()))
1989 );
1990 assert_eq!(
1991 tokenizer.next_token().unwrap(),
1992 Some(Token::String(b"With\x08backspace".to_vec()))
1993 );
1994 assert_eq!(
1995 tokenizer.next_token().unwrap(),
1996 Some(Token::String(b"With\x0Cformfeed".to_vec()))
1997 );
1998 assert_eq!(
1999 tokenizer.next_token().unwrap(),
2000 Some(Token::String(b"With(leftparen".to_vec()))
2001 );
2002 assert_eq!(
2003 tokenizer.next_token().unwrap(),
2004 Some(Token::String(b"With)rightparen".to_vec()))
2005 );
2006 }
2007
2008 #[test]
2009 fn test_hex_string_parsing() {
2010 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
2011 let mut tokenizer = ContentTokenizer::new(input);
2012
2013 assert_eq!(
2014 tokenizer.next_token().unwrap(),
2015 Some(Token::HexString(b"Hello".to_vec()))
2016 );
2017 assert_eq!(
2018 tokenizer.next_token().unwrap(),
2019 Some(Token::HexString(b"Hello".to_vec()))
2020 );
2021 assert_eq!(
2022 tokenizer.next_token().unwrap(),
2023 Some(Token::HexString(b"HelloW".to_vec()))
2024 );
2025 assert_eq!(
2026 tokenizer.next_token().unwrap(),
2027 Some(Token::HexString(b"Hello\x50".to_vec()))
2028 );
2029 }
2030
2031 #[test]
2032 fn test_name_parsing_edge_cases() {
2033 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
2034 let mut tokenizer = ContentTokenizer::new(input);
2035
2036 assert_eq!(
2037 tokenizer.next_token().unwrap(),
2038 Some(Token::Name("Name".to_string()))
2039 );
2040 assert_eq!(
2041 tokenizer.next_token().unwrap(),
2042 Some(Token::Name("Name with spaces".to_string()))
2043 );
2044 assert_eq!(
2045 tokenizer.next_token().unwrap(),
2046 Some(Token::Name("Name#with#hash".to_string()))
2047 );
2048 assert_eq!(
2049 tokenizer.next_token().unwrap(),
2050 Some(Token::Name("Name/with/slash".to_string()))
2051 );
2052 assert_eq!(
2053 tokenizer.next_token().unwrap(),
2054 Some(Token::Name("EmptyName".to_string()))
2055 );
2056 }
2057
2058 #[test]
2059 fn test_operator_parsing_edge_cases() {
2060 let content = b"q q q Q Q Q BT BT ET ET";
2061 let operators = ContentParser::parse(content).unwrap();
2062
2063 assert_eq!(operators.len(), 10);
2064 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2065 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
2066 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
2067 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
2068 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
2069 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
2070 assert_eq!(operators[6], ContentOperation::BeginText);
2071 assert_eq!(operators[7], ContentOperation::BeginText);
2072 assert_eq!(operators[8], ContentOperation::EndText);
2073 assert_eq!(operators[9], ContentOperation::EndText);
2074 }
2075
2076 #[test]
2077 fn test_error_handling_insufficient_operands() {
2078 let content = b"100 Td"; let result = ContentParser::parse(content);
2080 assert!(result.is_err());
2081 }
2082
2083 #[test]
2084 fn test_error_handling_invalid_operator() {
2085 let content = b"100 200 INVALID";
2086 let result = ContentParser::parse(content);
2087 assert!(result.is_err());
2088 }
2089
2090 #[test]
2091 fn test_error_handling_malformed_string() {
2092 let input = b"(Unclosed string";
2094 let mut tokenizer = ContentTokenizer::new(input);
2095 let result = tokenizer.next_token();
2096 assert!(result.is_ok() || result.is_err());
2099 }
2100
2101 #[test]
2102 fn test_error_handling_malformed_hex_string() {
2103 let input = b"<48656C6C6G>";
2104 let mut tokenizer = ContentTokenizer::new(input);
2105 let result = tokenizer.next_token();
2106 assert!(result.is_err());
2107 }
2108
2109 #[test]
2110 fn test_error_handling_malformed_name() {
2111 let input = b"/Name#GG";
2112 let mut tokenizer = ContentTokenizer::new(input);
2113 let result = tokenizer.next_token();
2114 assert!(result.is_err());
2115 }
2116
2117 #[test]
2118 fn test_empty_content_stream() {
2119 let content = b"";
2120 let operators = ContentParser::parse(content).unwrap();
2121 assert_eq!(operators.len(), 0);
2122 }
2123
2124 #[test]
2125 fn test_whitespace_only_content_stream() {
2126 let content = b" \t\n\r ";
2127 let operators = ContentParser::parse(content).unwrap();
2128 assert_eq!(operators.len(), 0);
2129 }
2130
2131 #[test]
2132 fn test_mixed_integer_and_real_operands() {
2133 let content = b"100 200 m 150 200 l";
2135 let operators = ContentParser::parse(content).unwrap();
2136
2137 assert_eq!(operators.len(), 2);
2138 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2139 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2140 }
2141
2142 #[test]
2143 fn test_negative_operands() {
2144 let content = b"-100 -200 Td -50.5 -75.2 TD";
2145 let operators = ContentParser::parse(content).unwrap();
2146
2147 assert_eq!(operators.len(), 2);
2148 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2149 assert_eq!(
2150 operators[1],
2151 ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2152 );
2153 }
2154
2155 #[test]
2156 fn test_large_numbers() {
2157 let content = b"999999.999999 -999999.999999 m";
2158 let operators = ContentParser::parse(content).unwrap();
2159
2160 assert_eq!(operators.len(), 1);
2161 assert_eq!(
2162 operators[0],
2163 ContentOperation::MoveTo(999999.999999, -999999.999999)
2164 );
2165 }
2166
2167 #[test]
2168 fn test_scientific_notation() {
2169 let content = b"123.45 -456.78 m";
2171 let operators = ContentParser::parse(content).unwrap();
2172
2173 assert_eq!(operators.len(), 1);
2174 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2175 }
2176
2177 #[test]
2178 fn test_show_text_array_complex() {
2179 let content = b"(Hello) TJ";
2181 let result = ContentParser::parse(content);
2182 assert!(result.is_err());
2184 }
2185
2186 #[test]
2187 fn test_dash_pattern_empty() {
2188 let content = b"0 d";
2190 let result = ContentParser::parse(content);
2191 assert!(result.is_err());
2193 }
2194
2195 #[test]
2196 fn test_dash_pattern_complex() {
2197 let content = b"2.5 d";
2199 let result = ContentParser::parse(content);
2200 assert!(result.is_err());
2202 }
2203
2204 #[test]
2205 fn test_pop_array_removes_array_end() {
2206 let parser = ContentParser::new(b"");
2208
2209 let mut operands = vec![
2211 Token::ArrayStart,
2212 Token::Integer(1),
2213 Token::Integer(2),
2214 Token::Integer(3),
2215 Token::ArrayEnd,
2216 ];
2217 let result = parser.pop_array(&mut operands).unwrap();
2218 assert_eq!(result.len(), 3);
2219 assert!(operands.is_empty());
2220
2221 let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2223 let result = parser.pop_array(&mut operands).unwrap();
2224 assert_eq!(result.len(), 2);
2225 assert!(operands.is_empty());
2226 }
2227
2228 #[test]
2229 fn test_dash_array_parsing_valid() {
2230 let parser = ContentParser::new(b"");
2232
2233 let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2235 let result = parser.parse_dash_array(valid_tokens).unwrap();
2236 assert_eq!(result, vec![3.0, 2.0]);
2237
2238 let empty_tokens = vec![];
2240 let result = parser.parse_dash_array(empty_tokens).unwrap();
2241 let expected: Vec<f32> = vec![];
2242 assert_eq!(result, expected);
2243 }
2244
2245 #[test]
2246 fn test_text_array_parsing_valid() {
2247 let parser = ContentParser::new(b"");
2249
2250 let valid_tokens = vec![
2252 Token::String(b"Hello".to_vec()),
2253 Token::Number(-100.0),
2254 Token::String(b"World".to_vec()),
2255 ];
2256 let result = parser.parse_text_array(valid_tokens).unwrap();
2257 assert_eq!(result.len(), 3);
2258 }
2259
2260 #[test]
2261 fn test_inline_image_handling() {
2262 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2263 let operators = ContentParser::parse(content).unwrap();
2264
2265 assert_eq!(operators.len(), 1);
2266 match &operators[0] {
2267 ContentOperation::InlineImage { params, data: _ } => {
2268 assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2270 assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2271 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2272 assert_eq!(
2273 params.get("ColorSpace"),
2274 Some(&Object::Name("DeviceRGB".to_string()))
2275 );
2276 }
2278 _ => panic!("Expected InlineImage operation"),
2279 }
2280 }
2281
2282 #[test]
2283 fn test_inline_image_with_filter() {
2284 let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2285 let operators = ContentParser::parse(content).unwrap();
2286
2287 assert_eq!(operators.len(), 1);
2288 match &operators[0] {
2289 ContentOperation::InlineImage { params, data: _ } => {
2290 assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2291 assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2292 assert_eq!(
2293 params.get("ColorSpace"),
2294 Some(&Object::Name("DeviceGray".to_string()))
2295 );
2296 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2297 assert_eq!(
2298 params.get("Filter"),
2299 Some(&Object::Name("ASCIIHexDecode".to_string()))
2300 );
2301 }
2302 _ => panic!("Expected InlineImage operation"),
2303 }
2304 }
2305
2306 #[test]
2307 fn test_content_parser_performance() {
2308 let mut content = Vec::new();
2309 for i in 0..1000 {
2310 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2311 }
2312
2313 let start = std::time::Instant::now();
2314 let operators = ContentParser::parse(&content).unwrap();
2315 let duration = start.elapsed();
2316
2317 assert_eq!(operators.len(), 1000);
2318 assert!(duration.as_millis() < 100); }
2320
2321 #[test]
2322 fn test_tokenizer_performance() {
2323 let mut input = Vec::new();
2324 for i in 0..1000 {
2325 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2326 }
2327
2328 let start = std::time::Instant::now();
2329 let mut tokenizer = ContentTokenizer::new(&input);
2330 let mut count = 0;
2331 while tokenizer.next_token().unwrap().is_some() {
2332 count += 1;
2333 }
2334 let duration = start.elapsed();
2335
2336 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
2339
2340 #[test]
2341 fn test_memory_usage_large_content() {
2342 let mut content = Vec::new();
2343 for i in 0..10000 {
2344 content.extend_from_slice(
2345 format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2346 .as_bytes(),
2347 );
2348 }
2349
2350 let operators = ContentParser::parse(&content).unwrap();
2351 assert_eq!(operators.len(), 10000);
2352
2353 for op in operators {
2355 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2356 }
2357 }
2358
2359 #[test]
2360 fn test_concurrent_parsing() {
2361 use std::sync::Arc;
2362 use std::thread;
2363
2364 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2365 let handles: Vec<_> = (0..10)
2366 .map(|_| {
2367 let content_clone = content.clone();
2368 thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2369 })
2370 .collect();
2371
2372 for handle in handles {
2373 let operators = handle.join().unwrap();
2374 assert_eq!(operators.len(), 5);
2375 assert_eq!(operators[0], ContentOperation::BeginText);
2376 assert_eq!(operators[4], ContentOperation::EndText);
2377 }
2378 }
2379
2380 #[test]
2383 fn test_tokenizer_hex_string_edge_cases() {
2384 let mut tokenizer = ContentTokenizer::new(b"<>");
2385 let token = tokenizer.next_token().unwrap().unwrap();
2386 match token {
2387 Token::HexString(data) => assert!(data.is_empty()),
2388 _ => panic!("Expected empty hex string"),
2389 }
2390
2391 let mut tokenizer = ContentTokenizer::new(b"<123>");
2393 let token = tokenizer.next_token().unwrap().unwrap();
2394 match token {
2395 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2396 _ => panic!("Expected hex string with odd digits"),
2397 }
2398
2399 let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2401 let token = tokenizer.next_token().unwrap().unwrap();
2402 match token {
2403 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2404 _ => panic!("Expected hex string with whitespace"),
2405 }
2406 }
2407
2408 #[test]
2409 fn test_tokenizer_literal_string_escape_sequences() {
2410 let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2412 let token = tokenizer.next_token().unwrap().unwrap();
2413 match token {
2414 Token::String(data) => {
2415 assert_eq!(
2416 data,
2417 vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2418 );
2419 }
2420 _ => panic!("Expected string with escapes"),
2421 }
2422
2423 let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2425 let token = tokenizer.next_token().unwrap().unwrap();
2426 match token {
2427 Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2428 _ => panic!("Expected string with octal escapes"),
2429 }
2430 }
2431
2432 #[test]
2433 fn test_tokenizer_nested_parentheses() {
2434 let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2435 let token = tokenizer.next_token().unwrap().unwrap();
2436 match token {
2437 Token::String(data) => {
2438 assert_eq!(data, b"outer (inner) text");
2439 }
2440 _ => panic!("Expected string with nested parentheses"),
2441 }
2442
2443 let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2445 let token = tokenizer.next_token().unwrap().unwrap();
2446 match token {
2447 Token::String(data) => {
2448 assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2449 }
2450 _ => panic!("Expected string with deep nesting"),
2451 }
2452 }
2453
2454 #[test]
2455 fn test_tokenizer_name_hex_escapes() {
2456 let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2457 let token = tokenizer.next_token().unwrap().unwrap();
2458 match token {
2459 Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2460 _ => panic!("Expected name with hex escapes"),
2461 }
2462
2463 let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2465 let token = tokenizer.next_token().unwrap().unwrap();
2466 match token {
2467 Token::Name(name) => assert_eq!(name, "Special/()<>"),
2468 _ => panic!("Expected name with special character escapes"),
2469 }
2470 }
2471
2472 #[test]
2473 fn test_tokenizer_number_edge_cases() {
2474 let mut tokenizer = ContentTokenizer::new(b"2147483647");
2476 let token = tokenizer.next_token().unwrap().unwrap();
2477 match token {
2478 Token::Integer(n) => assert_eq!(n, 2147483647),
2479 _ => panic!("Expected large integer"),
2480 }
2481
2482 let mut tokenizer = ContentTokenizer::new(b"0.00001");
2484 let token = tokenizer.next_token().unwrap().unwrap();
2485 match token {
2486 Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2487 _ => panic!("Expected small float"),
2488 }
2489
2490 let mut tokenizer = ContentTokenizer::new(b".5");
2492 let token = tokenizer.next_token().unwrap().unwrap();
2493 match token {
2494 Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2495 _ => panic!("Expected float starting with dot"),
2496 }
2497 }
2498
2499 #[test]
2500 fn test_parser_complex_path_operations() {
2501 let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2502 let operators = ContentParser::parse(content).unwrap();
2503
2504 assert_eq!(operators.len(), 6);
2505 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2506 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2507 assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2508 assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2509 assert_eq!(operators[4], ContentOperation::ClosePath);
2510 assert_eq!(operators[5], ContentOperation::Fill);
2511 }
2512
2513 #[test]
2514 fn test_parser_bezier_curves() {
2515 let content = b"100 100 150 50 200 150 c";
2516 let operators = ContentParser::parse(content).unwrap();
2517
2518 assert_eq!(operators.len(), 1);
2519 match &operators[0] {
2520 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2521 assert!(x1.is_finite() && y1.is_finite());
2525 assert!(x2.is_finite() && y2.is_finite());
2526 assert!(x3.is_finite() && y3.is_finite());
2527 assert!(*x1 >= 50.0 && *x1 <= 200.0);
2529 assert!(*y1 >= 50.0 && *y1 <= 200.0);
2530 }
2531 _ => panic!("Expected CurveTo operation"),
2532 }
2533 }
2534
2535 #[test]
2536 fn test_parser_color_operations() {
2537 let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2538 let operators = ContentParser::parse(content).unwrap();
2539
2540 assert_eq!(operators.len(), 5);
2541 match &operators[0] {
2542 ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2543 _ => panic!("Expected SetNonStrokingGray"),
2544 }
2545 match &operators[1] {
2546 ContentOperation::SetNonStrokingRGB(r, g, b) => {
2547 assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2548 }
2549 _ => panic!("Expected SetNonStrokingRGB"),
2550 }
2551 }
2552
2553 #[test]
2554 fn test_parser_text_positioning_advanced() {
2555 let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2556 let operators = ContentParser::parse(content).unwrap();
2557
2558 assert_eq!(operators.len(), 7);
2559 assert_eq!(operators[0], ContentOperation::BeginText);
2560 match &operators[1] {
2561 ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2562 assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2563 }
2564 _ => panic!("Expected SetTextMatrix"),
2565 }
2566 assert_eq!(operators[6], ContentOperation::EndText);
2567 }
2568
2569 #[test]
2570 fn test_parser_graphics_state_operations() {
2571 let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2572 let operators = ContentParser::parse(content).unwrap();
2573
2574 assert_eq!(operators.len(), 7);
2575 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2576 match &operators[1] {
2577 ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2578 assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2579 }
2580 _ => panic!("Expected SetTransformMatrix"),
2581 }
2582 assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2583 }
2584
2585 #[test]
2586 fn test_parser_xobject_operations() {
2587 let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2588 let operators = ContentParser::parse(content).unwrap();
2589
2590 assert_eq!(operators.len(), 3);
2591 for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2592 match &operators[i] {
2593 ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2594 _ => panic!("Expected PaintXObject"),
2595 }
2596 }
2597 }
2598
2599 #[test]
2600 fn test_parser_marked_content_operations() {
2601 let content = b"/P BMC (Tagged content) Tj EMC";
2602 let operators = ContentParser::parse(content).unwrap();
2603
2604 assert_eq!(operators.len(), 3);
2605 match &operators[0] {
2606 ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2607 _ => panic!("Expected BeginMarkedContent"),
2608 }
2609 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2610 }
2611
2612 #[test]
2613 fn test_parser_error_handling_invalid_operators() {
2614 let content = b"m";
2616 let result = ContentParser::parse(content);
2617 assert!(result.is_err());
2618
2619 let content = b"<ABC DEF BT";
2621 let result = ContentParser::parse(content);
2622 assert!(result.is_err());
2623
2624 let content = b"100 200 300"; let result = ContentParser::parse(content);
2627 assert!(result.is_ok()); }
2629
2630 #[test]
2631 fn test_parser_whitespace_tolerance() {
2632 let content = b" \n\t 100 \r\n 200 \t m \n";
2633 let operators = ContentParser::parse(content).unwrap();
2634
2635 assert_eq!(operators.len(), 1);
2636 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2637 }
2638
2639 #[test]
2640 fn test_tokenizer_comment_handling() {
2641 let content = b"100 % This is a comment\n200 m % Another comment";
2642 let operators = ContentParser::parse(content).unwrap();
2643
2644 assert_eq!(operators.len(), 1);
2645 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2646 }
2647
2648 #[test]
2649 fn test_parser_stream_with_binary_data() {
2650 let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2652
2653 let operators = ContentParser::parse(content).unwrap();
2654 assert_eq!(operators.len(), 2);
2655 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2656 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2657 }
2658
2659 #[test]
2660 fn test_tokenizer_array_parsing() {
2661 let content = b"100 200 m 150 250 l";
2663 let operators = ContentParser::parse(content).unwrap();
2664
2665 assert_eq!(operators.len(), 2);
2666 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2667 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2668 }
2669
2670 #[test]
2671 fn test_parser_rectangle_operations() {
2672 let content = b"10 20 100 50 re 0 0 200 300 re";
2673 let operators = ContentParser::parse(content).unwrap();
2674
2675 assert_eq!(operators.len(), 2);
2676 match &operators[0] {
2677 ContentOperation::Rectangle(x, y, width, height) => {
2678 assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2679 }
2680 _ => panic!("Expected Rectangle operation"),
2681 }
2682 match &operators[1] {
2683 ContentOperation::Rectangle(x, y, width, height) => {
2684 assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2685 }
2686 _ => panic!("Expected Rectangle operation"),
2687 }
2688 }
2689
2690 #[test]
2691 fn test_parser_clipping_operations() {
2692 let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2693 let operators = ContentParser::parse(content).unwrap();
2694
2695 assert_eq!(operators.len(), 6);
2696 assert_eq!(operators[1], ContentOperation::Clip);
2697 assert_eq!(operators[2], ContentOperation::EndPath);
2698 assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2699 assert_eq!(operators[5], ContentOperation::EndPath);
2700 }
2701
2702 #[test]
2703 fn test_parser_painting_operations() {
2704 let content = b"S s f f* B B* b b*";
2705 let operators = ContentParser::parse(content).unwrap();
2706
2707 assert_eq!(operators.len(), 8);
2708 assert_eq!(operators[0], ContentOperation::Stroke);
2709 assert_eq!(operators[1], ContentOperation::CloseStroke);
2710 assert_eq!(operators[2], ContentOperation::Fill);
2711 assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2712 assert_eq!(operators[4], ContentOperation::FillStroke);
2713 assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2714 assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2715 assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2716 }
2717
2718 #[test]
2719 fn test_parser_line_style_operations() {
2720 let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2721 let operators = ContentParser::parse(content).unwrap();
2722
2723 assert_eq!(operators.len(), 5);
2724 assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2725 assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2726 assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2727 assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2728 }
2730
2731 #[test]
2732 fn test_parser_text_state_operations() {
2733 let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2734 let operators = ContentParser::parse(content).unwrap();
2735
2736 assert_eq!(operators.len(), 5);
2737 assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2738 assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2739 assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2740 assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2741 assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2742 }
2743
2744 #[test]
2745 fn test_parser_unicode_text() {
2746 let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2747 let operators = ContentParser::parse(content).unwrap();
2748
2749 assert_eq!(operators.len(), 3);
2750 assert_eq!(operators[0], ContentOperation::BeginText);
2751 match &operators[1] {
2752 ContentOperation::ShowText(text) => {
2753 assert!(text.len() > 5); }
2755 _ => panic!("Expected ShowText operation"),
2756 }
2757 assert_eq!(operators[2], ContentOperation::EndText);
2758 }
2759
2760 #[test]
2761 fn test_parser_stress_test_large_coordinates() {
2762 let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2763 let operators = ContentParser::parse(content).unwrap();
2764
2765 assert_eq!(operators.len(), 1);
2766 match &operators[0] {
2767 ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2768 assert!((*_x1 - 999999.999).abs() < 0.1);
2769 assert!((*_y1 - (-999999.999)).abs() < 0.1);
2770 assert!((*_x3 - 999999.999).abs() < 0.1);
2771 }
2772 _ => panic!("Expected CurveTo operation"),
2773 }
2774 }
2775
2776 #[test]
2777 fn test_parser_empty_content_stream() {
2778 let content = b"";
2779 let operators = ContentParser::parse(content).unwrap();
2780 assert!(operators.is_empty());
2781
2782 let content = b" \n\t\r ";
2783 let operators = ContentParser::parse(content).unwrap();
2784 assert!(operators.is_empty());
2785 }
2786
2787 #[test]
2788 fn test_tokenizer_error_recovery() {
2789 let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2791 let result = ContentParser::parse(content);
2792 assert!(result.is_ok() || result.is_err());
2794 }
2795
2796 #[test]
2797 fn test_parser_optimization_repeated_operations() {
2798 let mut content = Vec::new();
2800 for i in 0..1000 {
2801 content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2802 }
2803
2804 let start = std::time::Instant::now();
2805 let operators = ContentParser::parse(&content).unwrap();
2806 let duration = start.elapsed();
2807
2808 assert_eq!(operators.len(), 1000);
2809 assert!(duration.as_millis() < 200); }
2811
2812 #[test]
2813 fn test_parser_memory_efficiency_large_strings() {
2814 let large_text = "A".repeat(10000);
2816 let content = format!("BT ({}) Tj ET", large_text);
2817 let operators = ContentParser::parse(content.as_bytes()).unwrap();
2818
2819 assert_eq!(operators.len(), 3);
2820 match &operators[1] {
2821 ContentOperation::ShowText(text) => {
2822 assert_eq!(text.len(), 10000);
2823 }
2824 _ => panic!("Expected ShowText operation"),
2825 }
2826 }
2827 }
2828
2829 #[test]
2830 fn test_content_stream_too_large() {
2831 let mut large_content = Vec::new();
2833
2834 for i in 0..10000 {
2836 large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
2837 }
2838 large_content.extend_from_slice(b"S");
2839
2840 let result = ContentParser::parse_content(&large_content);
2842 assert!(result.is_ok());
2843
2844 let operations = result.unwrap();
2845 assert!(operations.len() > 10000);
2847 }
2848
2849 #[test]
2850 fn test_invalid_operator_handling() {
2851 let content = b"100 200 INVALID_OP 300 400 m";
2853 let result = ContentParser::parse_content(content);
2854
2855 if let Ok(operations) = result {
2857 assert!(operations
2859 .iter()
2860 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
2861 }
2862 }
2863
2864 #[test]
2865 fn test_nested_arrays_malformed() {
2866 let content = b"[[(Hello] [World)]] TJ";
2868 let result = ContentParser::parse_content(content);
2869
2870 assert!(result.is_ok() || result.is_err());
2872 }
2873
2874 #[test]
2875 fn test_escape_sequences_in_strings() {
2876 let test_cases = vec![
2878 (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
2879 (b"(\\\\)".as_slice(), b"\\".as_slice()),
2880 (b"(\\(\\))".as_slice(), b"()".as_slice()),
2881 (b"(\\123)".as_slice(), b"S".as_slice()), (b"(\\0)".as_slice(), b"\0".as_slice()),
2883 ];
2884
2885 for (input, expected) in test_cases {
2886 let mut content = Vec::new();
2887 content.extend_from_slice(input);
2888 content.extend_from_slice(b" Tj");
2889
2890 let result = ContentParser::parse_content(&content);
2891 assert!(result.is_ok());
2892
2893 let operations = result.unwrap();
2894 if let ContentOperation::ShowText(text) = &operations[0] {
2895 assert_eq!(text, expected, "Failed for input: {:?}", input);
2896 } else {
2897 panic!("Expected ShowText operation");
2898 }
2899 }
2900 }
2901
2902 #[test]
2903 fn test_content_with_inline_images() {
2904 let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
2906 let result = ContentParser::parse_content(content);
2907
2908 assert!(result.is_ok() || result.is_err());
2910 }
2911
2912 #[test]
2913 fn test_operator_with_missing_operands() {
2914 let test_cases = vec![
2916 b"Tj" as &[u8], b"m", b"rg", b"Tf", ];
2921
2922 for content in test_cases {
2923 let result = ContentParser::parse_content(content);
2924 assert!(result.is_ok() || result.is_err());
2926 }
2927 }
2928
2929 #[test]
2932 fn test_tokenizer_handles_curly_braces() {
2933 let input = b"q { } Q";
2936 let mut tokenizer = ContentTokenizer::new(input);
2937
2938 let mut tokens = Vec::new();
2939 while let Some(token) = tokenizer.next_token().unwrap() {
2940 tokens.push(token);
2941 }
2942
2943 assert!(tokens.contains(&Token::Operator("q".to_string())));
2945 assert!(tokens.contains(&Token::Operator("Q".to_string())));
2946 }
2947
2948 #[test]
2949 fn test_tokenizer_handles_closing_paren() {
2950 let input = b"q ) Q";
2952 let mut tokenizer = ContentTokenizer::new(input);
2953
2954 let mut tokens = Vec::new();
2955 while let Some(token) = tokenizer.next_token().unwrap() {
2956 tokens.push(token);
2957 }
2958
2959 assert!(tokens.contains(&Token::Operator("q".to_string())));
2960 assert!(tokens.contains(&Token::Operator("Q".to_string())));
2961 }
2962
2963 #[test]
2964 fn test_inline_image_binary_with_curly_braces() {
2965 let content = b"BI /W 2 /H 2 /BPC 8 /CS /G ID \x7B\x7D\x00\xFF EI Q";
2968 let result = ContentParser::parse_content(content);
2969 assert!(
2970 result.is_ok(),
2971 "Parsing inline image with curly braces failed: {:?}",
2972 result.err()
2973 );
2974
2975 let ops = result.unwrap();
2976 let has_inline = ops
2978 .iter()
2979 .any(|op| matches!(op, ContentOperation::InlineImage { .. }));
2980 let has_q = ops
2981 .iter()
2982 .any(|op| matches!(op, ContentOperation::RestoreGraphicsState));
2983 assert!(has_inline, "Expected InlineImage operation");
2984 assert!(has_q, "Expected RestoreGraphicsState after EI");
2985 }
2986
2987 #[test]
2988 fn test_inline_image_binary_with_all_byte_values() {
2989 let mut content = Vec::new();
2991 content.extend_from_slice(b"BI /W 16 /H 16 /BPC 8 /CS /G ID ");
2992 for b in 0u8..=255 {
2994 content.push(b);
2995 }
2996 content.extend_from_slice(b" EI Q");
2997
2998 let result = ContentParser::parse_content(&content);
2999 assert!(
3000 result.is_ok(),
3001 "Parsing inline image with all byte values failed: {:?}",
3002 result.err()
3003 );
3004 }
3005
3006 #[test]
3007 fn test_inline_image_ei_detection() {
3008 let content = b"BI /W 2 /H 1 /BPC 8 /CS /G ID \x45\x49\x00\n EI Q";
3011 let result = ContentParser::parse_content(content);
3013 assert!(result.is_ok(), "EI detection failed: {:?}", result.err());
3014
3015 let ops = result.unwrap();
3016 let has_inline = ops
3017 .iter()
3018 .any(|op| matches!(op, ContentOperation::InlineImage { .. }));
3019 assert!(has_inline, "Expected InlineImage operation");
3020 }
3021
3022 #[test]
3023 fn test_tokenizer_no_infinite_loop_on_consecutive_delimiters() {
3024 let input = b"q {{{}}})))) Q";
3026 let mut tokenizer = ContentTokenizer::new(input);
3027
3028 let mut tokens = Vec::new();
3029 while let Some(token) = tokenizer.next_token().unwrap() {
3030 tokens.push(token);
3031 if tokens.len() > 100 {
3032 panic!("Tokenizer produced too many tokens — possible infinite loop");
3033 }
3034 }
3035
3036 assert!(tokens.contains(&Token::Operator("q".to_string())));
3037 assert!(tokens.contains(&Token::Operator("Q".to_string())));
3038 }
3039
3040 #[test]
3041 fn test_content_parser_inline_image_produces_correct_operation() {
3042 let content = b"BI /W 4 /H 4 /BPC 8 /CS /G ID \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F EI";
3044 let result = ContentParser::parse_content(content);
3045 assert!(result.is_ok(), "Parse failed: {:?}", result.err());
3046
3047 let ops = result.unwrap();
3048 assert_eq!(
3049 ops.len(),
3050 1,
3051 "Expected exactly 1 operation, got {}",
3052 ops.len()
3053 );
3054
3055 if let ContentOperation::InlineImage { params, data } = &ops[0] {
3056 assert_eq!(params.get("Width"), Some(&Object::Integer(4)));
3057 assert_eq!(params.get("Height"), Some(&Object::Integer(4)));
3058 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
3059 assert!(!data.is_empty(), "Image data should not be empty");
3060 } else {
3061 panic!("Expected InlineImage operation, got {:?}", ops[0]);
3062 }
3063 }
3064
3065 #[test]
3066 fn test_octal_escape_overflow_777() {
3067 let mut tokenizer = ContentTokenizer::new(b"(\\777)");
3071 let token = tokenizer.next_token().unwrap().unwrap();
3072 match token {
3073 Token::String(data) => assert_eq!(data, vec![0xFF]),
3074 _ => panic!("Expected string token"),
3075 }
3076 }
3077
3078 #[test]
3079 fn test_octal_escape_overflow_400() {
3080 let mut tokenizer = ContentTokenizer::new(b"(\\400)");
3083 let token = tokenizer.next_token().unwrap().unwrap();
3084 match token {
3085 Token::String(data) => assert_eq!(data, vec![0x00]),
3086 _ => panic!("Expected string token"),
3087 }
3088 }
3089
3090 #[test]
3091 fn test_octal_escape_overflow_577() {
3092 let mut tokenizer = ContentTokenizer::new(b"(\\577)");
3095 let token = tokenizer.next_token().unwrap().unwrap();
3096 match token {
3097 Token::String(data) => assert_eq!(data, vec![0x7F]),
3098 _ => panic!("Expected string token"),
3099 }
3100 }
3101
3102 #[test]
3103 fn test_octal_escape_max_valid_377() {
3104 let mut tokenizer = ContentTokenizer::new(b"(\\377)");
3106 let token = tokenizer.next_token().unwrap().unwrap();
3107 match token {
3108 Token::String(data) => assert_eq!(data, vec![0xFF]),
3109 _ => panic!("Expected string token"),
3110 }
3111 }
3112
3113 #[test]
3114 fn test_octal_escape_overflow_mixed_with_valid() {
3115 let mut tokenizer = ContentTokenizer::new(b"(A\\777B\\101C)");
3117 let token = tokenizer.next_token().unwrap().unwrap();
3118 match token {
3119 Token::String(data) => {
3120 assert_eq!(data, vec![b'A', 0xFF, b'B', b'A', b'C']);
3121 }
3122 _ => panic!("Expected string token"),
3123 }
3124 }
3125}