1use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60#[derive(Debug, Clone, PartialEq)]
94pub enum ContentOperation {
95 BeginText,
99
100 EndText,
103
104 SetCharSpacing(f32),
108
109 SetWordSpacing(f32),
112
113 SetHorizontalScaling(f32),
116
117 SetLeading(f32),
120
121 SetFont(String, f32),
124
125 SetTextRenderMode(i32),
128
129 SetTextRise(f32),
132
133 MoveText(f32, f32),
137
138 MoveTextSetLeading(f32, f32),
141
142 SetTextMatrix(f32, f32, f32, f32, f32, f32),
145
146 NextLine,
149
150 ShowText(Vec<u8>),
154
155 ShowTextArray(Vec<TextElement>),
158
159 NextLineShowText(Vec<u8>),
162
163 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
166
167 SaveGraphicsState,
171
172 RestoreGraphicsState,
175
176 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
179
180 SetLineWidth(f32),
182
183 SetLineCap(i32),
186
187 SetLineJoin(i32),
190
191 SetMiterLimit(f32),
194
195 SetDashPattern(Vec<f32>, f32),
198
199 SetIntent(String),
202
203 SetFlatness(f32),
206
207 SetGraphicsStateParams(String),
210
211 MoveTo(f32, f32),
214
215 LineTo(f32, f32),
217
218 CurveTo(f32, f32, f32, f32, f32, f32),
221
222 CurveToV(f32, f32, f32, f32),
224
225 CurveToY(f32, f32, f32, f32),
227
228 ClosePath,
231
232 Rectangle(f32, f32, f32, f32),
235
236 Stroke,
239
240 CloseStroke,
243
244 Fill,
246
247 FillEvenOdd,
249
250 FillStroke,
253
254 FillStrokeEvenOdd,
256
257 CloseFillStroke,
260
261 CloseFillStrokeEvenOdd,
263
264 EndPath,
267
268 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
276
277 SetNonStrokingColorSpace(String),
280
281 SetStrokingColor(Vec<f32>),
284
285 SetNonStrokingColor(Vec<f32>),
288
289 SetStrokingGray(f32),
292
293 SetNonStrokingGray(f32),
295
296 SetStrokingRGB(f32, f32, f32),
299
300 SetNonStrokingRGB(f32, f32, f32),
302
303 SetStrokingCMYK(f32, f32, f32, f32),
305
306 SetNonStrokingCMYK(f32, f32, f32, f32),
308
309 ShadingFill(String), BeginInlineImage,
315 InlineImage {
317 params: HashMap<String, Object>,
319 data: Vec<u8>,
321 },
322
323 PaintXObject(String),
327
328 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
339
340#[derive(Debug, Clone, PartialEq)]
359pub enum TextElement {
360 Text(Vec<u8>),
362 Spacing(f32),
365}
366
367#[derive(Debug, Clone, PartialEq)]
369pub(super) enum Token {
370 Number(f32),
371 Integer(i32),
372 String(Vec<u8>),
373 HexString(Vec<u8>),
374 Name(String),
375 Operator(String),
376 ArrayStart,
377 ArrayEnd,
378 DictStart,
379 DictEnd,
380 InlineImageData(Vec<u8>),
384}
385
386pub struct ContentTokenizer<'a> {
388 input: &'a [u8],
389 position: usize,
390 in_inline_image: bool,
393}
394
395impl<'a> ContentTokenizer<'a> {
396 pub fn new(input: &'a [u8]) -> Self {
398 Self {
399 input,
400 position: 0,
401 in_inline_image: false,
402 }
403 }
404
405 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
407 if self.in_inline_image {
409 self.in_inline_image = false;
410 return self.read_inline_image_data();
411 }
412
413 self.skip_whitespace();
414
415 if self.position >= self.input.len() {
416 return Ok(None);
417 }
418
419 let ch = self.input[self.position];
420
421 match ch {
422 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
424
425 b'(' => self.read_literal_string(),
427 b'<' => {
428 if self.peek_next() == Some(b'<') {
429 self.position += 2;
430 Ok(Some(Token::DictStart))
431 } else {
432 self.read_hex_string()
433 }
434 }
435 b'>' => {
436 if self.peek_next() == Some(b'>') {
437 self.position += 2;
438 Ok(Some(Token::DictEnd))
439 } else {
440 Err(ParseError::SyntaxError {
441 position: self.position,
442 message: "Unexpected '>'".to_string(),
443 })
444 }
445 }
446
447 b'[' => {
449 self.position += 1;
450 Ok(Some(Token::ArrayStart))
451 }
452 b']' => {
453 self.position += 1;
454 Ok(Some(Token::ArrayEnd))
455 }
456
457 b'/' => self.read_name(),
459
460 b';' | b')' | b'{' | b'}' => {
465 self.position += 1;
466 self.next_token() }
468
469 _ => {
471 let token = self.read_operator()?;
472 if let Some(Token::Operator(ref op)) = token {
474 if op == "ID" {
475 self.in_inline_image = true;
476 }
477 }
478 Ok(token)
479 }
480 }
481 }
482
483 fn skip_whitespace(&mut self) {
484 while self.position < self.input.len() {
485 match self.input[self.position] {
486 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
487 b'%' => self.skip_comment(),
488 _ => break,
489 }
490 }
491 }
492
493 fn skip_comment(&mut self) {
494 while self.position < self.input.len() && self.input[self.position] != b'\n' {
495 self.position += 1;
496 }
497 }
498
499 fn peek_next(&self) -> Option<u8> {
500 if self.position + 1 < self.input.len() {
501 Some(self.input[self.position + 1])
502 } else {
503 None
504 }
505 }
506
507 fn read_number(&mut self) -> ParseResult<Option<Token>> {
508 let start = self.position;
509 let mut has_dot = false;
510
511 if self.position < self.input.len()
513 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
514 {
515 self.position += 1;
516 }
517
518 while self.position < self.input.len() {
520 match self.input[self.position] {
521 b'0'..=b'9' => self.position += 1,
522 b'.' if !has_dot => {
523 has_dot = true;
524 self.position += 1;
525 }
526 _ => break,
527 }
528 }
529
530 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
531 ParseError::SyntaxError {
532 position: start,
533 message: "Invalid number format".to_string(),
534 }
535 })?;
536
537 if has_dot {
538 let value = num_str
539 .parse::<f32>()
540 .map_err(|_| ParseError::SyntaxError {
541 position: start,
542 message: "Invalid float number".to_string(),
543 })?;
544 Ok(Some(Token::Number(value)))
545 } else {
546 let value = num_str
547 .parse::<i32>()
548 .map_err(|_| ParseError::SyntaxError {
549 position: start,
550 message: "Invalid integer number".to_string(),
551 })?;
552 Ok(Some(Token::Integer(value)))
553 }
554 }
555
556 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
557 self.position += 1; let mut result = Vec::new();
559 let mut paren_depth = 1;
560 let mut escape = false;
561
562 while self.position < self.input.len() && paren_depth > 0 {
563 let ch = self.input[self.position];
564 self.position += 1;
565
566 if escape {
567 match ch {
568 b'n' => result.push(b'\n'),
569 b'r' => result.push(b'\r'),
570 b't' => result.push(b'\t'),
571 b'b' => result.push(b'\x08'),
572 b'f' => result.push(b'\x0C'),
573 b'(' => result.push(b'('),
574 b')' => result.push(b')'),
575 b'\\' => result.push(b'\\'),
576 b'0'..=b'7' => {
577 self.position -= 1;
579 let octal_value = self.read_octal_escape()?;
580 result.push(octal_value);
581 }
582 _ => result.push(ch), }
584 escape = false;
585 } else {
586 match ch {
587 b'\\' => escape = true,
588 b'(' => {
589 paren_depth += 1;
590 result.push(ch);
591 }
592 b')' => {
593 paren_depth -= 1;
594 if paren_depth > 0 {
595 result.push(ch);
596 }
597 }
598 _ => result.push(ch),
599 }
600 }
601 }
602
603 Ok(Some(Token::String(result)))
604 }
605
606 fn read_octal_escape(&mut self) -> ParseResult<u8> {
607 let mut value = 0u16;
610 let mut count = 0;
611
612 while count < 3 && self.position < self.input.len() {
613 match self.input[self.position] {
614 b'0'..=b'7' => {
615 value = value * 8 + u16::from(self.input[self.position] - b'0');
616 self.position += 1;
617 count += 1;
618 }
619 _ => break,
620 }
621 }
622
623 Ok(value as u8)
624 }
625
626 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
627 self.position += 1; let mut result = Vec::new();
629 let mut nibble = None;
630
631 while self.position < self.input.len() {
632 let ch = self.input[self.position];
633
634 match ch {
635 b'>' => {
636 self.position += 1;
637 if let Some(n) = nibble {
639 result.push(n << 4);
640 }
641 return Ok(Some(Token::HexString(result)));
642 }
643 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
644 let digit = if ch <= b'9' {
645 ch - b'0'
646 } else if ch <= b'F' {
647 ch - b'A' + 10
648 } else {
649 ch - b'a' + 10
650 };
651
652 if let Some(n) = nibble {
653 result.push((n << 4) | digit);
654 nibble = None;
655 } else {
656 nibble = Some(digit);
657 }
658 self.position += 1;
659 }
660 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
661 self.position += 1;
663 }
664 _ => {
665 return Err(ParseError::SyntaxError {
666 position: self.position,
667 message: format!("Invalid character in hex string: {:?}", ch as char),
668 });
669 }
670 }
671 }
672
673 Err(ParseError::SyntaxError {
674 position: self.position,
675 message: "Unterminated hex string".to_string(),
676 })
677 }
678
679 fn read_name(&mut self) -> ParseResult<Option<Token>> {
680 self.position += 1; let start = self.position;
682
683 while self.position < self.input.len() {
684 let ch = self.input[self.position];
685 match ch {
686 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
687 | b']' | b'{' | b'}' | b'/' | b'%' => break,
688 b'#' => {
689 self.position += 1;
691 if self.position + 1 < self.input.len() {
692 self.position += 2;
693 }
694 }
695 _ => self.position += 1,
696 }
697 }
698
699 let name_bytes = &self.input[start..self.position];
700 let name = self.decode_name(name_bytes)?;
701 Ok(Some(Token::Name(name)))
702 }
703
704 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
705 let mut result = Vec::new();
706 let mut i = 0;
707
708 while i < bytes.len() {
709 if bytes[i] == b'#' && i + 2 < bytes.len() {
710 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
712 ParseError::SyntaxError {
713 position: self.position,
714 message: "Invalid hex escape in name".to_string(),
715 }
716 })?;
717 let value =
718 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
719 position: self.position,
720 message: "Invalid hex escape in name".to_string(),
721 })?;
722 result.push(value);
723 i += 3;
724 } else {
725 result.push(bytes[i]);
726 i += 1;
727 }
728 }
729
730 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
731 position: self.position,
732 message: "Invalid UTF-8 in name".to_string(),
733 })
734 }
735
736 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
737 let start = self.position;
738
739 while self.position < self.input.len() {
740 let ch = self.input[self.position];
741 match ch {
742 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
743 | b']' | b'{' | b'}' | b'/' | b'%' | b';' => break,
744 _ => self.position += 1,
745 }
746 }
747
748 let op_bytes = &self.input[start..self.position];
749 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
750 position: start,
751 message: "Invalid operator".to_string(),
752 })?;
753
754 Ok(Some(Token::Operator(op.to_string())))
755 }
756
757 fn read_inline_image_data(&mut self) -> ParseResult<Option<Token>> {
763 if self.position < self.input.len() {
765 let ch = self.input[self.position];
766 if ch == b' ' || ch == b'\n' || ch == b'\r' || ch == b'\t' {
767 self.position += 1;
768 if ch == b'\r'
770 && self.position < self.input.len()
771 && self.input[self.position] == b'\n'
772 {
773 self.position += 1;
774 }
775 }
776 }
777
778 let start = self.position;
779
780 while self.position + 1 < self.input.len() {
782 let preceded_by_whitespace = self.position == start
783 || matches!(
784 self.input[self.position - 1],
785 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C'
786 );
787
788 if preceded_by_whitespace
789 && self.input[self.position] == b'E'
790 && self.input[self.position + 1] == b'I'
791 {
792 let after_ei = self.position + 2;
793 let followed_by_boundary = after_ei >= self.input.len()
794 || matches!(
795 self.input[after_ei],
796 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'/' | b'<' | b'(' | b'[' | b'%'
797 );
798
799 if followed_by_boundary {
800 let mut end = self.position;
802 if end > start
803 && matches!(self.input[end - 1], b' ' | b'\t' | b'\r' | b'\n' | b'\x0C')
804 {
805 end -= 1;
806 }
807 let data = self.input[start..end].to_vec();
808 self.position = after_ei; return Ok(Some(Token::InlineImageData(data)));
810 }
811 }
812 self.position += 1;
813 }
814
815 let data = self.input[start..].to_vec();
817 self.position = self.input.len();
818 Ok(Some(Token::InlineImageData(data)))
819 }
820}
821
822pub struct ContentParser {
841 tokens: Vec<Token>,
842 position: usize,
843}
844
845impl ContentParser {
846 pub fn new(_content: &[u8]) -> Self {
848 Self {
849 tokens: Vec::new(),
850 position: 0,
851 }
852 }
853
854 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
888 Self::parse_content(content)
889 }
890
891 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
896 let mut tokenizer = ContentTokenizer::new(content);
897 let mut tokens = Vec::new();
898
899 while let Some(token) = tokenizer.next_token()? {
901 tokens.push(token);
902 }
903
904 let mut parser = Self {
905 tokens,
906 position: 0,
907 };
908
909 parser.parse_operators()
910 }
911
912 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
913 let mut operators = Vec::new();
914 let mut operand_stack: Vec<Token> = Vec::new();
915
916 while self.position < self.tokens.len() {
917 let token = self.tokens[self.position].clone();
918 self.position += 1;
919
920 match &token {
921 Token::Operator(op) => {
922 let operator = self.parse_operator(op, &mut operand_stack)?;
923 operators.push(operator);
924 }
925 _ => {
926 operand_stack.push(token);
928 }
929 }
930 }
931
932 Ok(operators)
933 }
934
935 fn parse_operator(
936 &mut self,
937 op: &str,
938 operands: &mut Vec<Token>,
939 ) -> ParseResult<ContentOperation> {
940 let operator = match op {
941 "BT" => ContentOperation::BeginText,
943 "ET" => ContentOperation::EndText,
944
945 "Tc" => {
947 let spacing = self.pop_number(operands)?;
948 ContentOperation::SetCharSpacing(spacing)
949 }
950 "Tw" => {
951 let spacing = self.pop_number(operands)?;
952 ContentOperation::SetWordSpacing(spacing)
953 }
954 "Tz" => {
955 let scale = self.pop_number(operands)?;
956 ContentOperation::SetHorizontalScaling(scale)
957 }
958 "TL" => {
959 let leading = self.pop_number(operands)?;
960 ContentOperation::SetLeading(leading)
961 }
962 "Tf" => {
963 let size = self.pop_number(operands)?;
964 let font = self.pop_name(operands)?;
965 ContentOperation::SetFont(font, size)
966 }
967 "Tr" => {
968 let mode = self.pop_integer(operands)?;
969 ContentOperation::SetTextRenderMode(mode)
970 }
971 "Ts" => {
972 let rise = self.pop_number(operands)?;
973 ContentOperation::SetTextRise(rise)
974 }
975
976 "Td" => {
978 let ty = self.pop_number(operands)?;
979 let tx = self.pop_number(operands)?;
980 ContentOperation::MoveText(tx, ty)
981 }
982 "TD" => {
983 let ty = self.pop_number(operands)?;
984 let tx = self.pop_number(operands)?;
985 ContentOperation::MoveTextSetLeading(tx, ty)
986 }
987 "Tm" => {
988 let f = self.pop_number(operands)?;
989 let e = self.pop_number(operands)?;
990 let d = self.pop_number(operands)?;
991 let c = self.pop_number(operands)?;
992 let b = self.pop_number(operands)?;
993 let a = self.pop_number(operands)?;
994 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
995 }
996 "T*" => ContentOperation::NextLine,
997
998 "Tj" => {
1000 let text = self.pop_string(operands)?;
1001 ContentOperation::ShowText(text)
1002 }
1003 "TJ" => {
1004 let array = self.pop_array(operands)?;
1005 let elements = self.parse_text_array(array)?;
1006 ContentOperation::ShowTextArray(elements)
1007 }
1008 "'" => {
1009 let text = self.pop_string(operands)?;
1010 ContentOperation::NextLineShowText(text)
1011 }
1012 "\"" => {
1013 let text = self.pop_string(operands)?;
1014 let aw = self.pop_number(operands)?;
1015 let ac = self.pop_number(operands)?;
1016 ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
1017 }
1018
1019 "q" => ContentOperation::SaveGraphicsState,
1021 "Q" => ContentOperation::RestoreGraphicsState,
1022 "cm" => {
1023 let f = self.pop_number(operands)?;
1024 let e = self.pop_number(operands)?;
1025 let d = self.pop_number(operands)?;
1026 let c = self.pop_number(operands)?;
1027 let b = self.pop_number(operands)?;
1028 let a = self.pop_number(operands)?;
1029 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
1030 }
1031 "w" => {
1032 let width = self.pop_number(operands)?;
1033 ContentOperation::SetLineWidth(width)
1034 }
1035 "J" => {
1036 let cap = self.pop_integer(operands)?;
1037 ContentOperation::SetLineCap(cap)
1038 }
1039 "j" => {
1040 let join = self.pop_integer(operands)?;
1041 ContentOperation::SetLineJoin(join)
1042 }
1043 "M" => {
1044 let limit = self.pop_number(operands)?;
1045 ContentOperation::SetMiterLimit(limit)
1046 }
1047 "d" => {
1048 let phase = self.pop_number(operands)?;
1049 let array = self.pop_array(operands)?;
1050 let pattern = self.parse_dash_array(array)?;
1051 ContentOperation::SetDashPattern(pattern, phase)
1052 }
1053 "ri" => {
1054 let intent = self.pop_name(operands)?;
1055 ContentOperation::SetIntent(intent)
1056 }
1057 "i" => {
1058 let flatness = self.pop_number(operands)?;
1059 ContentOperation::SetFlatness(flatness)
1060 }
1061 "gs" => {
1062 let name = self.pop_name(operands)?;
1063 ContentOperation::SetGraphicsStateParams(name)
1064 }
1065
1066 "m" => {
1068 let y = self.pop_number(operands)?;
1069 let x = self.pop_number(operands)?;
1070 ContentOperation::MoveTo(x, y)
1071 }
1072 "l" => {
1073 let y = self.pop_number(operands)?;
1074 let x = self.pop_number(operands)?;
1075 ContentOperation::LineTo(x, y)
1076 }
1077 "c" => {
1078 let y3 = self.pop_number(operands)?;
1079 let x3 = self.pop_number(operands)?;
1080 let y2 = self.pop_number(operands)?;
1081 let x2 = self.pop_number(operands)?;
1082 let y1 = self.pop_number(operands)?;
1083 let x1 = self.pop_number(operands)?;
1084 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
1085 }
1086 "v" => {
1087 let y3 = self.pop_number(operands)?;
1088 let x3 = self.pop_number(operands)?;
1089 let y2 = self.pop_number(operands)?;
1090 let x2 = self.pop_number(operands)?;
1091 ContentOperation::CurveToV(x2, y2, x3, y3)
1092 }
1093 "y" => {
1094 let y3 = self.pop_number(operands)?;
1095 let x3 = self.pop_number(operands)?;
1096 let y1 = self.pop_number(operands)?;
1097 let x1 = self.pop_number(operands)?;
1098 ContentOperation::CurveToY(x1, y1, x3, y3)
1099 }
1100 "h" => ContentOperation::ClosePath,
1101 "re" => {
1102 let height = self.pop_number(operands)?;
1103 let width = self.pop_number(operands)?;
1104 let y = self.pop_number(operands)?;
1105 let x = self.pop_number(operands)?;
1106 ContentOperation::Rectangle(x, y, width, height)
1107 }
1108
1109 "S" => ContentOperation::Stroke,
1111 "s" => ContentOperation::CloseStroke,
1112 "f" | "F" => ContentOperation::Fill,
1113 "f*" => ContentOperation::FillEvenOdd,
1114 "B" => ContentOperation::FillStroke,
1115 "B*" => ContentOperation::FillStrokeEvenOdd,
1116 "b" => ContentOperation::CloseFillStroke,
1117 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1118 "n" => ContentOperation::EndPath,
1119
1120 "W" => ContentOperation::Clip,
1122 "W*" => ContentOperation::ClipEvenOdd,
1123
1124 "CS" => {
1126 let name = self.pop_name(operands)?;
1127 ContentOperation::SetStrokingColorSpace(name)
1128 }
1129 "cs" => {
1130 let name = self.pop_name(operands)?;
1131 ContentOperation::SetNonStrokingColorSpace(name)
1132 }
1133 "SC" | "SCN" => {
1134 let components = self.pop_color_components(operands)?;
1135 ContentOperation::SetStrokingColor(components)
1136 }
1137 "sc" | "scn" => {
1138 let components = self.pop_color_components(operands)?;
1139 ContentOperation::SetNonStrokingColor(components)
1140 }
1141 "G" => {
1142 let gray = self.pop_number(operands)?;
1143 ContentOperation::SetStrokingGray(gray)
1144 }
1145 "g" => {
1146 let gray = self.pop_number(operands)?;
1147 ContentOperation::SetNonStrokingGray(gray)
1148 }
1149 "RG" => {
1150 let b = self.pop_number(operands)?;
1151 let g = self.pop_number(operands)?;
1152 let r = self.pop_number(operands)?;
1153 ContentOperation::SetStrokingRGB(r, g, b)
1154 }
1155 "rg" => {
1156 let b = self.pop_number(operands)?;
1157 let g = self.pop_number(operands)?;
1158 let r = self.pop_number(operands)?;
1159 ContentOperation::SetNonStrokingRGB(r, g, b)
1160 }
1161 "K" => {
1162 let k = self.pop_number(operands)?;
1163 let y = self.pop_number(operands)?;
1164 let m = self.pop_number(operands)?;
1165 let c = self.pop_number(operands)?;
1166 ContentOperation::SetStrokingCMYK(c, m, y, k)
1167 }
1168 "k" => {
1169 let k = self.pop_number(operands)?;
1170 let y = self.pop_number(operands)?;
1171 let m = self.pop_number(operands)?;
1172 let c = self.pop_number(operands)?;
1173 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1174 }
1175
1176 "sh" => {
1178 let name = self.pop_name(operands)?;
1179 ContentOperation::ShadingFill(name)
1180 }
1181
1182 "Do" => {
1184 let name = self.pop_name(operands)?;
1185 ContentOperation::PaintXObject(name)
1186 }
1187
1188 "BMC" => {
1190 let tag = self.pop_name(operands)?;
1191 ContentOperation::BeginMarkedContent(tag)
1192 }
1193 "BDC" => {
1194 let props = self.pop_dict_or_name(operands)?;
1195 let tag = self.pop_name(operands)?;
1196 ContentOperation::BeginMarkedContentWithProps(tag, props)
1197 }
1198 "EMC" => ContentOperation::EndMarkedContent,
1199 "MP" => {
1200 let tag = self.pop_name(operands)?;
1201 ContentOperation::DefineMarkedContentPoint(tag)
1202 }
1203 "DP" => {
1204 let props = self.pop_dict_or_name(operands)?;
1205 let tag = self.pop_name(operands)?;
1206 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1207 }
1208
1209 "BX" => ContentOperation::BeginCompatibility,
1211 "EX" => ContentOperation::EndCompatibility,
1212
1213 "BI" => {
1215 operands.clear(); self.parse_inline_image()?
1217 }
1218
1219 _ => {
1220 return Err(ParseError::SyntaxError {
1221 position: self.position,
1222 message: format!("Unknown operator: {op}"),
1223 });
1224 }
1225 };
1226
1227 operands.clear(); Ok(operator)
1229 }
1230
1231 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1233 match operands.pop() {
1234 Some(Token::Number(n)) => Ok(n),
1235 Some(Token::Integer(i)) => Ok(i as f32),
1236 _ => Err(ParseError::SyntaxError {
1237 position: self.position,
1238 message: "Expected number operand".to_string(),
1239 }),
1240 }
1241 }
1242
1243 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1244 match operands.pop() {
1245 Some(Token::Integer(i)) => Ok(i),
1246 _ => Err(ParseError::SyntaxError {
1247 position: self.position,
1248 message: "Expected integer operand".to_string(),
1249 }),
1250 }
1251 }
1252
1253 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1254 match operands.pop() {
1255 Some(Token::Name(n)) => Ok(n),
1256 _ => Err(ParseError::SyntaxError {
1257 position: self.position,
1258 message: "Expected name operand".to_string(),
1259 }),
1260 }
1261 }
1262
1263 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1264 match operands.pop() {
1265 Some(Token::String(s)) => Ok(s),
1266 Some(Token::HexString(s)) => Ok(s),
1267 _ => Err(ParseError::SyntaxError {
1268 position: self.position,
1269 message: "Expected string operand".to_string(),
1270 }),
1271 }
1272 }
1273
1274 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1275 let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1277 if has_array_end {
1278 operands.pop(); }
1280
1281 let mut array = Vec::new();
1282 let mut found_start = false;
1283
1284 while let Some(token) = operands.pop() {
1286 match token {
1287 Token::ArrayStart => {
1288 found_start = true;
1289 break;
1290 }
1291 Token::ArrayEnd => {
1292 continue;
1294 }
1295 _ => array.push(token),
1296 }
1297 }
1298
1299 if !found_start {
1300 return Err(ParseError::SyntaxError {
1301 position: self.position,
1302 message: "Expected array".to_string(),
1303 });
1304 }
1305
1306 array.reverse(); Ok(array)
1308 }
1309
1310 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1311 if let Some(token) = operands.pop() {
1312 match token {
1313 Token::Name(name) => {
1314 let mut props = HashMap::new();
1317 props.insert("__resource_ref".to_string(), name);
1318 Ok(props)
1319 }
1320 Token::DictEnd => {
1321 let mut props = HashMap::new();
1325
1326 while let Some(value_token) = operands.pop() {
1328 if matches!(value_token, Token::DictStart) {
1329 break;
1330 }
1331
1332 let value = match &value_token {
1336 Token::Name(name) => name.clone(),
1337 Token::String(s) => String::from_utf8_lossy(s).to_string(),
1338 Token::Integer(i) => i.to_string(),
1339 Token::Number(f) => f.to_string(),
1340 Token::ArrayEnd => {
1341 let mut array_elements = Vec::new();
1343 while let Some(arr_token) = operands.pop() {
1344 match arr_token {
1345 Token::ArrayStart => break,
1346 Token::Name(n) => array_elements.push(n),
1347 Token::String(s) => array_elements
1348 .push(String::from_utf8_lossy(&s).to_string()),
1349 Token::Integer(i) => array_elements.push(i.to_string()),
1350 Token::Number(f) => array_elements.push(f.to_string()),
1351 _ => {} }
1353 }
1354 array_elements.reverse();
1355 format!("[{}]", array_elements.join(", "))
1356 }
1357 _ => continue, };
1359
1360 if let Some(Token::Name(key)) = operands.pop() {
1362 props.insert(key, value);
1363 }
1364 }
1365
1366 Ok(props)
1367 }
1368 _ => {
1369 Ok(HashMap::new())
1371 }
1372 }
1373 } else {
1374 Err(ParseError::SyntaxError {
1376 position: 0,
1377 message: "Expected dictionary or name for marked content properties".to_string(),
1378 })
1379 }
1380 }
1381
1382 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1383 let mut components = Vec::new();
1384
1385 while let Some(token) = operands.last() {
1387 match token {
1388 Token::Number(n) => {
1389 components.push(*n);
1390 operands.pop();
1391 }
1392 Token::Integer(i) => {
1393 components.push(*i as f32);
1394 operands.pop();
1395 }
1396 _ => break,
1397 }
1398 }
1399
1400 components.reverse();
1401 Ok(components)
1402 }
1403
1404 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1405 let mut elements = Vec::new();
1406
1407 for token in tokens {
1408 match token {
1409 Token::String(s) | Token::HexString(s) => {
1410 elements.push(TextElement::Text(s));
1411 }
1412 Token::Number(n) => {
1413 elements.push(TextElement::Spacing(n));
1414 }
1415 Token::Integer(i) => {
1416 elements.push(TextElement::Spacing(i as f32));
1417 }
1418 _ => {
1419 return Err(ParseError::SyntaxError {
1420 position: self.position,
1421 message: "Invalid element in text array".to_string(),
1422 });
1423 }
1424 }
1425 }
1426
1427 Ok(elements)
1428 }
1429
1430 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1431 let mut pattern = Vec::new();
1432
1433 for token in tokens {
1434 match token {
1435 Token::Number(n) => pattern.push(n),
1436 Token::Integer(i) => pattern.push(i as f32),
1437 _ => {
1438 return Err(ParseError::SyntaxError {
1439 position: self.position,
1440 message: "Invalid element in dash array".to_string(),
1441 });
1442 }
1443 }
1444 }
1445
1446 Ok(pattern)
1447 }
1448
1449 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1450 let mut params = HashMap::new();
1452
1453 while self.position < self.tokens.len() {
1454 if let Token::Operator(op) = &self.tokens[self.position] {
1456 if op == "ID" {
1457 self.position += 1;
1458 break;
1459 }
1460 }
1461
1462 if let Token::Name(key) = &self.tokens[self.position] {
1467 self.position += 1;
1468 if self.position >= self.tokens.len() {
1469 break;
1470 }
1471
1472 let value = match &self.tokens[self.position] {
1474 Token::Integer(n) => Object::Integer(*n as i64),
1475 Token::Number(n) => Object::Real(*n as f64),
1476 Token::Name(s) => Object::Name(expand_inline_name(s)),
1477 Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1478 Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1479 _ => Object::Null,
1480 };
1481
1482 let full_key = expand_inline_key(key);
1484 params.insert(full_key, value);
1485 self.position += 1;
1486 } else {
1487 self.position += 1;
1488 }
1489 }
1490
1491 let data = if self.position < self.tokens.len() {
1494 if let Token::InlineImageData(bytes) = &self.tokens[self.position] {
1495 let d = bytes.clone();
1496 self.position += 1;
1497 d
1498 } else {
1499 self.collect_inline_image_data_from_tokens()?
1501 }
1502 } else {
1503 Vec::new()
1504 };
1505
1506 Ok(ContentOperation::InlineImage { params, data })
1507 }
1508
1509 fn collect_inline_image_data_from_tokens(&mut self) -> ParseResult<Vec<u8>> {
1512 let mut data = Vec::new();
1513 while self.position < self.tokens.len() {
1514 if let Token::Operator(op) = &self.tokens[self.position] {
1515 if op == "EI" {
1516 self.position += 1;
1517 break;
1518 }
1519 }
1520 match &self.tokens[self.position] {
1521 Token::String(bytes) | Token::HexString(bytes) => {
1522 data.extend_from_slice(bytes);
1523 }
1524 Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1525 Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1526 Token::Name(s) | Token::Operator(s) => data.extend_from_slice(s.as_bytes()),
1527 _ => {}
1528 }
1529 self.position += 1;
1530 }
1531 Ok(data)
1532 }
1533}
1534
1535fn expand_inline_key(key: &str) -> String {
1537 match key {
1538 "W" => "Width".to_string(),
1539 "H" => "Height".to_string(),
1540 "CS" | "ColorSpace" => "ColorSpace".to_string(),
1541 "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1542 "F" => "Filter".to_string(),
1543 "DP" | "DecodeParms" => "DecodeParms".to_string(),
1544 "IM" => "ImageMask".to_string(),
1545 "I" => "Interpolate".to_string(),
1546 "Intent" => "Intent".to_string(),
1547 "D" => "Decode".to_string(),
1548 _ => key.to_string(),
1549 }
1550}
1551
1552fn expand_inline_name(name: &str) -> String {
1554 match name {
1555 "G" => "DeviceGray".to_string(),
1556 "RGB" => "DeviceRGB".to_string(),
1557 "CMYK" => "DeviceCMYK".to_string(),
1558 "I" => "Indexed".to_string(),
1559 "AHx" => "ASCIIHexDecode".to_string(),
1560 "A85" => "ASCII85Decode".to_string(),
1561 "LZW" => "LZWDecode".to_string(),
1562 "Fl" => "FlateDecode".to_string(),
1563 "RL" => "RunLengthDecode".to_string(),
1564 "DCT" => "DCTDecode".to_string(),
1565 "CCF" => "CCITTFaxDecode".to_string(),
1566 _ => name.to_string(),
1567 }
1568}
1569
1570#[cfg(test)]
1571mod tests {
1572 use super::*;
1573
1574 #[test]
1575 fn test_tokenize_numbers() {
1576 let input = b"123 -45 3.14159 -0.5 .5";
1577 let mut tokenizer = ContentTokenizer::new(input);
1578
1579 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1580 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1581 assert_eq!(
1582 tokenizer.next_token().unwrap(),
1583 Some(Token::Number(3.14159))
1584 );
1585 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1586 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1587 assert_eq!(tokenizer.next_token().unwrap(), None);
1588 }
1589
1590 #[test]
1591 fn test_tokenize_strings() {
1592 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1593 let mut tokenizer = ContentTokenizer::new(input);
1594
1595 assert_eq!(
1596 tokenizer.next_token().unwrap(),
1597 Some(Token::String(b"Hello World".to_vec()))
1598 );
1599 assert_eq!(
1600 tokenizer.next_token().unwrap(),
1601 Some(Token::String(b"Hello\nWorld".to_vec()))
1602 );
1603 assert_eq!(
1604 tokenizer.next_token().unwrap(),
1605 Some(Token::String(b"Nested (paren)".to_vec()))
1606 );
1607 }
1608
1609 #[test]
1610 fn test_tokenize_hex_strings() {
1611 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1612 let mut tokenizer = ContentTokenizer::new(input);
1613
1614 assert_eq!(
1615 tokenizer.next_token().unwrap(),
1616 Some(Token::HexString(b"Hello".to_vec()))
1617 );
1618 assert_eq!(
1619 tokenizer.next_token().unwrap(),
1620 Some(Token::HexString(b"Hello".to_vec()))
1621 );
1622 }
1623
1624 #[test]
1625 fn test_tokenize_names() {
1626 let input = b"/Name /Name#20with#20spaces /A#42C";
1627 let mut tokenizer = ContentTokenizer::new(input);
1628
1629 assert_eq!(
1630 tokenizer.next_token().unwrap(),
1631 Some(Token::Name("Name".to_string()))
1632 );
1633 assert_eq!(
1634 tokenizer.next_token().unwrap(),
1635 Some(Token::Name("Name with spaces".to_string()))
1636 );
1637 assert_eq!(
1638 tokenizer.next_token().unwrap(),
1639 Some(Token::Name("ABC".to_string()))
1640 );
1641 }
1642
1643 #[test]
1644 fn test_tokenize_operators() {
1645 let input = b"BT Tj ET q Q";
1646 let mut tokenizer = ContentTokenizer::new(input);
1647
1648 assert_eq!(
1649 tokenizer.next_token().unwrap(),
1650 Some(Token::Operator("BT".to_string()))
1651 );
1652 assert_eq!(
1653 tokenizer.next_token().unwrap(),
1654 Some(Token::Operator("Tj".to_string()))
1655 );
1656 assert_eq!(
1657 tokenizer.next_token().unwrap(),
1658 Some(Token::Operator("ET".to_string()))
1659 );
1660 assert_eq!(
1661 tokenizer.next_token().unwrap(),
1662 Some(Token::Operator("q".to_string()))
1663 );
1664 assert_eq!(
1665 tokenizer.next_token().unwrap(),
1666 Some(Token::Operator("Q".to_string()))
1667 );
1668 }
1669
1670 #[test]
1671 fn test_parse_text_operators() {
1672 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1673 let operators = ContentParser::parse(content).unwrap();
1674
1675 assert_eq!(operators.len(), 5);
1676 assert_eq!(operators[0], ContentOperation::BeginText);
1677 assert_eq!(
1678 operators[1],
1679 ContentOperation::SetFont("F1".to_string(), 12.0)
1680 );
1681 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1682 assert_eq!(
1683 operators[3],
1684 ContentOperation::ShowText(b"Hello World".to_vec())
1685 );
1686 assert_eq!(operators[4], ContentOperation::EndText);
1687 }
1688
1689 #[test]
1690 fn test_parse_graphics_operators() {
1691 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1692 let operators = ContentParser::parse(content).unwrap();
1693
1694 assert_eq!(operators.len(), 6);
1695 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1696 assert_eq!(
1697 operators[1],
1698 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1699 );
1700 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1701 assert_eq!(
1702 operators[3],
1703 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1704 );
1705 assert_eq!(operators[4], ContentOperation::Stroke);
1706 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1707 }
1708
1709 #[test]
1710 fn test_parse_color_operators() {
1711 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1712 let operators = ContentParser::parse(content).unwrap();
1713
1714 assert_eq!(operators.len(), 3);
1715 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1716 assert_eq!(
1717 operators[1],
1718 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1719 );
1720 assert_eq!(
1721 operators[2],
1722 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1723 );
1724 }
1725
1726 mod comprehensive_tests {
1728 use super::*;
1729
1730 #[test]
1731 fn test_all_text_operators() {
1732 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1734 let operators = ContentParser::parse(content).unwrap();
1735
1736 assert_eq!(operators[0], ContentOperation::BeginText);
1737 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1738 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1739 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1740 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1741 assert_eq!(
1742 operators[5],
1743 ContentOperation::SetFont("F1".to_string(), 12.0)
1744 );
1745 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1746 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1747 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1748 assert_eq!(
1749 operators[9],
1750 ContentOperation::MoveTextSetLeading(50.0, 150.0)
1751 );
1752 assert_eq!(operators[10], ContentOperation::NextLine);
1753 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1754 assert_eq!(operators[12], ContentOperation::EndText);
1755 }
1756
1757 #[test]
1758 fn test_all_graphics_state_operators() {
1759 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1761 let operators = ContentParser::parse(content).unwrap();
1762
1763 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1764 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1765 assert_eq!(
1766 operators[2],
1767 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1768 );
1769 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1770 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1771 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1772 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1773 assert_eq!(
1774 operators[7],
1775 ContentOperation::SetGraphicsStateParams("GS1".to_string())
1776 );
1777 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1778 assert_eq!(
1779 operators[9],
1780 ContentOperation::SetIntent("Perceptual".to_string())
1781 );
1782 }
1783
1784 #[test]
1785 fn test_all_path_construction_operators() {
1786 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1787 let operators = ContentParser::parse(content).unwrap();
1788
1789 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1790 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1791 assert_eq!(
1792 operators[2],
1793 ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1794 );
1795 assert_eq!(
1796 operators[3],
1797 ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1798 );
1799 assert_eq!(
1800 operators[4],
1801 ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1802 );
1803 assert_eq!(operators[5], ContentOperation::ClosePath);
1804 assert_eq!(
1805 operators[6],
1806 ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1807 );
1808 }
1809
1810 #[test]
1811 fn test_all_path_painting_operators() {
1812 let content = b"S s f F f* B B* b b* n W W*";
1813 let operators = ContentParser::parse(content).unwrap();
1814
1815 assert_eq!(operators[0], ContentOperation::Stroke);
1816 assert_eq!(operators[1], ContentOperation::CloseStroke);
1817 assert_eq!(operators[2], ContentOperation::Fill);
1818 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1820 assert_eq!(operators[5], ContentOperation::FillStroke);
1821 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1822 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1823 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1824 assert_eq!(operators[9], ContentOperation::EndPath);
1825 assert_eq!(operators[10], ContentOperation::Clip);
1826 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1827 }
1828
1829 #[test]
1830 fn test_all_color_operators() {
1831 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1833 let operators = ContentParser::parse(content).unwrap();
1834
1835 assert_eq!(
1836 operators[0],
1837 ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1838 );
1839 assert_eq!(
1840 operators[1],
1841 ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1842 );
1843 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1844 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1845 assert_eq!(
1846 operators[4],
1847 ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1848 );
1849 assert_eq!(
1850 operators[5],
1851 ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1852 );
1853 assert_eq!(
1854 operators[6],
1855 ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1856 );
1857 assert_eq!(
1858 operators[7],
1859 ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1860 );
1861 assert_eq!(
1862 operators[8],
1863 ContentOperation::ShadingFill("Shade1".to_string())
1864 );
1865 }
1866
1867 #[test]
1868 fn test_xobject_and_marked_content_operators() {
1869 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1871 let operators = ContentParser::parse(content).unwrap();
1872
1873 assert_eq!(
1874 operators[0],
1875 ContentOperation::PaintXObject("Image1".to_string())
1876 );
1877 assert_eq!(
1878 operators[1],
1879 ContentOperation::BeginMarkedContent("MC1".to_string())
1880 );
1881 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1882 assert_eq!(
1883 operators[3],
1884 ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1885 );
1886 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1887 assert_eq!(operators[5], ContentOperation::EndCompatibility);
1888 }
1889
1890 #[test]
1891 fn test_complex_content_stream() {
1892 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1893 let operators = ContentParser::parse(content).unwrap();
1894
1895 assert_eq!(operators.len(), 8);
1896 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1897 assert_eq!(
1898 operators[1],
1899 ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1900 );
1901 assert_eq!(operators[2], ContentOperation::BeginText);
1902 assert_eq!(
1903 operators[3],
1904 ContentOperation::SetFont("F1".to_string(), 12.0)
1905 );
1906 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1907 assert_eq!(
1908 operators[5],
1909 ContentOperation::ShowText(b"Complex".to_vec())
1910 );
1911 assert_eq!(operators[6], ContentOperation::EndText);
1912 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1913 }
1914
1915 #[test]
1916 fn test_tokenizer_whitespace_handling() {
1917 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
1918 let mut tokenizer = ContentTokenizer::new(input);
1919
1920 assert_eq!(
1921 tokenizer.next_token().unwrap(),
1922 Some(Token::Operator("BT".to_string()))
1923 );
1924 assert_eq!(
1925 tokenizer.next_token().unwrap(),
1926 Some(Token::Name("F1".to_string()))
1927 );
1928 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1929 assert_eq!(
1930 tokenizer.next_token().unwrap(),
1931 Some(Token::Operator("Tf".to_string()))
1932 );
1933 assert_eq!(
1934 tokenizer.next_token().unwrap(),
1935 Some(Token::Operator("ET".to_string()))
1936 );
1937 assert_eq!(tokenizer.next_token().unwrap(), None);
1938 }
1939
1940 #[test]
1941 fn test_tokenizer_edge_cases() {
1942 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1944 let mut tokenizer = ContentTokenizer::new(input);
1945
1946 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1947 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1948 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1949 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1950 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1951 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1952 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1953 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1954 }
1955
1956 #[test]
1957 fn test_string_parsing_edge_cases() {
1958 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1959 let mut tokenizer = ContentTokenizer::new(input);
1960
1961 assert_eq!(
1962 tokenizer.next_token().unwrap(),
1963 Some(Token::String(b"Simple".to_vec()))
1964 );
1965 assert_eq!(
1966 tokenizer.next_token().unwrap(),
1967 Some(Token::String(b"With\\backslash".to_vec()))
1968 );
1969 assert_eq!(
1970 tokenizer.next_token().unwrap(),
1971 Some(Token::String(b"With)paren".to_vec()))
1972 );
1973 assert_eq!(
1974 tokenizer.next_token().unwrap(),
1975 Some(Token::String(b"With\newline".to_vec()))
1976 );
1977 assert_eq!(
1978 tokenizer.next_token().unwrap(),
1979 Some(Token::String(b"With\ttab".to_vec()))
1980 );
1981 assert_eq!(
1982 tokenizer.next_token().unwrap(),
1983 Some(Token::String(b"With\rcarriage".to_vec()))
1984 );
1985 assert_eq!(
1986 tokenizer.next_token().unwrap(),
1987 Some(Token::String(b"With\x08backspace".to_vec()))
1988 );
1989 assert_eq!(
1990 tokenizer.next_token().unwrap(),
1991 Some(Token::String(b"With\x0Cformfeed".to_vec()))
1992 );
1993 assert_eq!(
1994 tokenizer.next_token().unwrap(),
1995 Some(Token::String(b"With(leftparen".to_vec()))
1996 );
1997 assert_eq!(
1998 tokenizer.next_token().unwrap(),
1999 Some(Token::String(b"With)rightparen".to_vec()))
2000 );
2001 }
2002
2003 #[test]
2004 fn test_hex_string_parsing() {
2005 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
2006 let mut tokenizer = ContentTokenizer::new(input);
2007
2008 assert_eq!(
2009 tokenizer.next_token().unwrap(),
2010 Some(Token::HexString(b"Hello".to_vec()))
2011 );
2012 assert_eq!(
2013 tokenizer.next_token().unwrap(),
2014 Some(Token::HexString(b"Hello".to_vec()))
2015 );
2016 assert_eq!(
2017 tokenizer.next_token().unwrap(),
2018 Some(Token::HexString(b"HelloW".to_vec()))
2019 );
2020 assert_eq!(
2021 tokenizer.next_token().unwrap(),
2022 Some(Token::HexString(b"Hello\x50".to_vec()))
2023 );
2024 }
2025
2026 #[test]
2027 fn test_name_parsing_edge_cases() {
2028 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
2029 let mut tokenizer = ContentTokenizer::new(input);
2030
2031 assert_eq!(
2032 tokenizer.next_token().unwrap(),
2033 Some(Token::Name("Name".to_string()))
2034 );
2035 assert_eq!(
2036 tokenizer.next_token().unwrap(),
2037 Some(Token::Name("Name with spaces".to_string()))
2038 );
2039 assert_eq!(
2040 tokenizer.next_token().unwrap(),
2041 Some(Token::Name("Name#with#hash".to_string()))
2042 );
2043 assert_eq!(
2044 tokenizer.next_token().unwrap(),
2045 Some(Token::Name("Name/with/slash".to_string()))
2046 );
2047 assert_eq!(
2048 tokenizer.next_token().unwrap(),
2049 Some(Token::Name("EmptyName".to_string()))
2050 );
2051 }
2052
2053 #[test]
2054 fn test_operator_parsing_edge_cases() {
2055 let content = b"q q q Q Q Q BT BT ET ET";
2056 let operators = ContentParser::parse(content).unwrap();
2057
2058 assert_eq!(operators.len(), 10);
2059 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2060 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
2061 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
2062 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
2063 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
2064 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
2065 assert_eq!(operators[6], ContentOperation::BeginText);
2066 assert_eq!(operators[7], ContentOperation::BeginText);
2067 assert_eq!(operators[8], ContentOperation::EndText);
2068 assert_eq!(operators[9], ContentOperation::EndText);
2069 }
2070
2071 #[test]
2072 fn test_error_handling_insufficient_operands() {
2073 let content = b"100 Td"; let result = ContentParser::parse(content);
2075 assert!(result.is_err());
2076 }
2077
2078 #[test]
2079 fn test_error_handling_invalid_operator() {
2080 let content = b"100 200 INVALID";
2081 let result = ContentParser::parse(content);
2082 assert!(result.is_err());
2083 }
2084
2085 #[test]
2086 fn test_error_handling_malformed_string() {
2087 let input = b"(Unclosed string";
2089 let mut tokenizer = ContentTokenizer::new(input);
2090 let result = tokenizer.next_token();
2091 assert!(result.is_ok() || result.is_err());
2094 }
2095
2096 #[test]
2097 fn test_error_handling_malformed_hex_string() {
2098 let input = b"<48656C6C6G>";
2099 let mut tokenizer = ContentTokenizer::new(input);
2100 let result = tokenizer.next_token();
2101 assert!(result.is_err());
2102 }
2103
2104 #[test]
2105 fn test_error_handling_malformed_name() {
2106 let input = b"/Name#GG";
2107 let mut tokenizer = ContentTokenizer::new(input);
2108 let result = tokenizer.next_token();
2109 assert!(result.is_err());
2110 }
2111
2112 #[test]
2113 fn test_empty_content_stream() {
2114 let content = b"";
2115 let operators = ContentParser::parse(content).unwrap();
2116 assert_eq!(operators.len(), 0);
2117 }
2118
2119 #[test]
2120 fn test_whitespace_only_content_stream() {
2121 let content = b" \t\n\r ";
2122 let operators = ContentParser::parse(content).unwrap();
2123 assert_eq!(operators.len(), 0);
2124 }
2125
2126 #[test]
2127 fn test_mixed_integer_and_real_operands() {
2128 let content = b"100 200 m 150 200 l";
2130 let operators = ContentParser::parse(content).unwrap();
2131
2132 assert_eq!(operators.len(), 2);
2133 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2134 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2135 }
2136
2137 #[test]
2138 fn test_negative_operands() {
2139 let content = b"-100 -200 Td -50.5 -75.2 TD";
2140 let operators = ContentParser::parse(content).unwrap();
2141
2142 assert_eq!(operators.len(), 2);
2143 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2144 assert_eq!(
2145 operators[1],
2146 ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2147 );
2148 }
2149
2150 #[test]
2151 fn test_large_numbers() {
2152 let content = b"999999.999999 -999999.999999 m";
2153 let operators = ContentParser::parse(content).unwrap();
2154
2155 assert_eq!(operators.len(), 1);
2156 assert_eq!(
2157 operators[0],
2158 ContentOperation::MoveTo(999999.999999, -999999.999999)
2159 );
2160 }
2161
2162 #[test]
2163 fn test_scientific_notation() {
2164 let content = b"123.45 -456.78 m";
2166 let operators = ContentParser::parse(content).unwrap();
2167
2168 assert_eq!(operators.len(), 1);
2169 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2170 }
2171
2172 #[test]
2173 fn test_show_text_array_complex() {
2174 let content = b"(Hello) TJ";
2176 let result = ContentParser::parse(content);
2177 assert!(result.is_err());
2179 }
2180
2181 #[test]
2182 fn test_dash_pattern_empty() {
2183 let content = b"0 d";
2185 let result = ContentParser::parse(content);
2186 assert!(result.is_err());
2188 }
2189
2190 #[test]
2191 fn test_dash_pattern_complex() {
2192 let content = b"2.5 d";
2194 let result = ContentParser::parse(content);
2195 assert!(result.is_err());
2197 }
2198
2199 #[test]
2200 fn test_pop_array_removes_array_end() {
2201 let parser = ContentParser::new(b"");
2203
2204 let mut operands = vec![
2206 Token::ArrayStart,
2207 Token::Integer(1),
2208 Token::Integer(2),
2209 Token::Integer(3),
2210 Token::ArrayEnd,
2211 ];
2212 let result = parser.pop_array(&mut operands).unwrap();
2213 assert_eq!(result.len(), 3);
2214 assert!(operands.is_empty());
2215
2216 let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2218 let result = parser.pop_array(&mut operands).unwrap();
2219 assert_eq!(result.len(), 2);
2220 assert!(operands.is_empty());
2221 }
2222
2223 #[test]
2224 fn test_dash_array_parsing_valid() {
2225 let parser = ContentParser::new(b"");
2227
2228 let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2230 let result = parser.parse_dash_array(valid_tokens).unwrap();
2231 assert_eq!(result, vec![3.0, 2.0]);
2232
2233 let empty_tokens = vec![];
2235 let result = parser.parse_dash_array(empty_tokens).unwrap();
2236 let expected: Vec<f32> = vec![];
2237 assert_eq!(result, expected);
2238 }
2239
2240 #[test]
2241 fn test_text_array_parsing_valid() {
2242 let parser = ContentParser::new(b"");
2244
2245 let valid_tokens = vec![
2247 Token::String(b"Hello".to_vec()),
2248 Token::Number(-100.0),
2249 Token::String(b"World".to_vec()),
2250 ];
2251 let result = parser.parse_text_array(valid_tokens).unwrap();
2252 assert_eq!(result.len(), 3);
2253 }
2254
2255 #[test]
2256 fn test_inline_image_handling() {
2257 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2258 let operators = ContentParser::parse(content).unwrap();
2259
2260 assert_eq!(operators.len(), 1);
2261 match &operators[0] {
2262 ContentOperation::InlineImage { params, data: _ } => {
2263 assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2265 assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2266 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2267 assert_eq!(
2268 params.get("ColorSpace"),
2269 Some(&Object::Name("DeviceRGB".to_string()))
2270 );
2271 }
2273 _ => panic!("Expected InlineImage operation"),
2274 }
2275 }
2276
2277 #[test]
2278 fn test_inline_image_with_filter() {
2279 let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2280 let operators = ContentParser::parse(content).unwrap();
2281
2282 assert_eq!(operators.len(), 1);
2283 match &operators[0] {
2284 ContentOperation::InlineImage { params, data: _ } => {
2285 assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2286 assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2287 assert_eq!(
2288 params.get("ColorSpace"),
2289 Some(&Object::Name("DeviceGray".to_string()))
2290 );
2291 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2292 assert_eq!(
2293 params.get("Filter"),
2294 Some(&Object::Name("ASCIIHexDecode".to_string()))
2295 );
2296 }
2297 _ => panic!("Expected InlineImage operation"),
2298 }
2299 }
2300
2301 #[test]
2302 fn test_content_parser_performance() {
2303 let mut content = Vec::new();
2304 for i in 0..1000 {
2305 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2306 }
2307
2308 let start = std::time::Instant::now();
2309 let operators = ContentParser::parse(&content).unwrap();
2310 let duration = start.elapsed();
2311
2312 assert_eq!(operators.len(), 1000);
2313 assert!(duration.as_millis() < 100); }
2315
2316 #[test]
2317 fn test_tokenizer_performance() {
2318 let mut input = Vec::new();
2319 for i in 0..1000 {
2320 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2321 }
2322
2323 let start = std::time::Instant::now();
2324 let mut tokenizer = ContentTokenizer::new(&input);
2325 let mut count = 0;
2326 while tokenizer.next_token().unwrap().is_some() {
2327 count += 1;
2328 }
2329 let duration = start.elapsed();
2330
2331 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
2334
2335 #[test]
2336 fn test_memory_usage_large_content() {
2337 let mut content = Vec::new();
2338 for i in 0..10000 {
2339 content.extend_from_slice(
2340 format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2341 .as_bytes(),
2342 );
2343 }
2344
2345 let operators = ContentParser::parse(&content).unwrap();
2346 assert_eq!(operators.len(), 10000);
2347
2348 for op in operators {
2350 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2351 }
2352 }
2353
2354 #[test]
2355 fn test_concurrent_parsing() {
2356 use std::sync::Arc;
2357 use std::thread;
2358
2359 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2360 let handles: Vec<_> = (0..10)
2361 .map(|_| {
2362 let content_clone = content.clone();
2363 thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2364 })
2365 .collect();
2366
2367 for handle in handles {
2368 let operators = handle.join().unwrap();
2369 assert_eq!(operators.len(), 5);
2370 assert_eq!(operators[0], ContentOperation::BeginText);
2371 assert_eq!(operators[4], ContentOperation::EndText);
2372 }
2373 }
2374
2375 #[test]
2378 fn test_tokenizer_hex_string_edge_cases() {
2379 let mut tokenizer = ContentTokenizer::new(b"<>");
2380 let token = tokenizer.next_token().unwrap().unwrap();
2381 match token {
2382 Token::HexString(data) => assert!(data.is_empty()),
2383 _ => panic!("Expected empty hex string"),
2384 }
2385
2386 let mut tokenizer = ContentTokenizer::new(b"<123>");
2388 let token = tokenizer.next_token().unwrap().unwrap();
2389 match token {
2390 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2391 _ => panic!("Expected hex string with odd digits"),
2392 }
2393
2394 let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2396 let token = tokenizer.next_token().unwrap().unwrap();
2397 match token {
2398 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2399 _ => panic!("Expected hex string with whitespace"),
2400 }
2401 }
2402
2403 #[test]
2404 fn test_tokenizer_literal_string_escape_sequences() {
2405 let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2407 let token = tokenizer.next_token().unwrap().unwrap();
2408 match token {
2409 Token::String(data) => {
2410 assert_eq!(
2411 data,
2412 vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2413 );
2414 }
2415 _ => panic!("Expected string with escapes"),
2416 }
2417
2418 let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2420 let token = tokenizer.next_token().unwrap().unwrap();
2421 match token {
2422 Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2423 _ => panic!("Expected string with octal escapes"),
2424 }
2425 }
2426
2427 #[test]
2428 fn test_tokenizer_nested_parentheses() {
2429 let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2430 let token = tokenizer.next_token().unwrap().unwrap();
2431 match token {
2432 Token::String(data) => {
2433 assert_eq!(data, b"outer (inner) text");
2434 }
2435 _ => panic!("Expected string with nested parentheses"),
2436 }
2437
2438 let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2440 let token = tokenizer.next_token().unwrap().unwrap();
2441 match token {
2442 Token::String(data) => {
2443 assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2444 }
2445 _ => panic!("Expected string with deep nesting"),
2446 }
2447 }
2448
2449 #[test]
2450 fn test_tokenizer_name_hex_escapes() {
2451 let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2452 let token = tokenizer.next_token().unwrap().unwrap();
2453 match token {
2454 Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2455 _ => panic!("Expected name with hex escapes"),
2456 }
2457
2458 let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2460 let token = tokenizer.next_token().unwrap().unwrap();
2461 match token {
2462 Token::Name(name) => assert_eq!(name, "Special/()<>"),
2463 _ => panic!("Expected name with special character escapes"),
2464 }
2465 }
2466
2467 #[test]
2468 fn test_tokenizer_number_edge_cases() {
2469 let mut tokenizer = ContentTokenizer::new(b"2147483647");
2471 let token = tokenizer.next_token().unwrap().unwrap();
2472 match token {
2473 Token::Integer(n) => assert_eq!(n, 2147483647),
2474 _ => panic!("Expected large integer"),
2475 }
2476
2477 let mut tokenizer = ContentTokenizer::new(b"0.00001");
2479 let token = tokenizer.next_token().unwrap().unwrap();
2480 match token {
2481 Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2482 _ => panic!("Expected small float"),
2483 }
2484
2485 let mut tokenizer = ContentTokenizer::new(b".5");
2487 let token = tokenizer.next_token().unwrap().unwrap();
2488 match token {
2489 Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2490 _ => panic!("Expected float starting with dot"),
2491 }
2492 }
2493
2494 #[test]
2495 fn test_parser_complex_path_operations() {
2496 let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2497 let operators = ContentParser::parse(content).unwrap();
2498
2499 assert_eq!(operators.len(), 6);
2500 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2501 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2502 assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2503 assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2504 assert_eq!(operators[4], ContentOperation::ClosePath);
2505 assert_eq!(operators[5], ContentOperation::Fill);
2506 }
2507
2508 #[test]
2509 fn test_parser_bezier_curves() {
2510 let content = b"100 100 150 50 200 150 c";
2511 let operators = ContentParser::parse(content).unwrap();
2512
2513 assert_eq!(operators.len(), 1);
2514 match &operators[0] {
2515 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2516 assert!(x1.is_finite() && y1.is_finite());
2520 assert!(x2.is_finite() && y2.is_finite());
2521 assert!(x3.is_finite() && y3.is_finite());
2522 assert!(*x1 >= 50.0 && *x1 <= 200.0);
2524 assert!(*y1 >= 50.0 && *y1 <= 200.0);
2525 }
2526 _ => panic!("Expected CurveTo operation"),
2527 }
2528 }
2529
2530 #[test]
2531 fn test_parser_color_operations() {
2532 let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2533 let operators = ContentParser::parse(content).unwrap();
2534
2535 assert_eq!(operators.len(), 5);
2536 match &operators[0] {
2537 ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2538 _ => panic!("Expected SetNonStrokingGray"),
2539 }
2540 match &operators[1] {
2541 ContentOperation::SetNonStrokingRGB(r, g, b) => {
2542 assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2543 }
2544 _ => panic!("Expected SetNonStrokingRGB"),
2545 }
2546 }
2547
2548 #[test]
2549 fn test_parser_text_positioning_advanced() {
2550 let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2551 let operators = ContentParser::parse(content).unwrap();
2552
2553 assert_eq!(operators.len(), 7);
2554 assert_eq!(operators[0], ContentOperation::BeginText);
2555 match &operators[1] {
2556 ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2557 assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2558 }
2559 _ => panic!("Expected SetTextMatrix"),
2560 }
2561 assert_eq!(operators[6], ContentOperation::EndText);
2562 }
2563
2564 #[test]
2565 fn test_parser_graphics_state_operations() {
2566 let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2567 let operators = ContentParser::parse(content).unwrap();
2568
2569 assert_eq!(operators.len(), 7);
2570 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2571 match &operators[1] {
2572 ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2573 assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2574 }
2575 _ => panic!("Expected SetTransformMatrix"),
2576 }
2577 assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2578 }
2579
2580 #[test]
2581 fn test_parser_xobject_operations() {
2582 let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2583 let operators = ContentParser::parse(content).unwrap();
2584
2585 assert_eq!(operators.len(), 3);
2586 for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2587 match &operators[i] {
2588 ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2589 _ => panic!("Expected PaintXObject"),
2590 }
2591 }
2592 }
2593
2594 #[test]
2595 fn test_parser_marked_content_operations() {
2596 let content = b"/P BMC (Tagged content) Tj EMC";
2597 let operators = ContentParser::parse(content).unwrap();
2598
2599 assert_eq!(operators.len(), 3);
2600 match &operators[0] {
2601 ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2602 _ => panic!("Expected BeginMarkedContent"),
2603 }
2604 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2605 }
2606
2607 #[test]
2608 fn test_parser_error_handling_invalid_operators() {
2609 let content = b"m";
2611 let result = ContentParser::parse(content);
2612 assert!(result.is_err());
2613
2614 let content = b"<ABC DEF BT";
2616 let result = ContentParser::parse(content);
2617 assert!(result.is_err());
2618
2619 let content = b"100 200 300"; let result = ContentParser::parse(content);
2622 assert!(result.is_ok()); }
2624
2625 #[test]
2626 fn test_parser_whitespace_tolerance() {
2627 let content = b" \n\t 100 \r\n 200 \t m \n";
2628 let operators = ContentParser::parse(content).unwrap();
2629
2630 assert_eq!(operators.len(), 1);
2631 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2632 }
2633
2634 #[test]
2635 fn test_tokenizer_comment_handling() {
2636 let content = b"100 % This is a comment\n200 m % Another comment";
2637 let operators = ContentParser::parse(content).unwrap();
2638
2639 assert_eq!(operators.len(), 1);
2640 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2641 }
2642
2643 #[test]
2644 fn test_parser_stream_with_binary_data() {
2645 let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2647
2648 let operators = ContentParser::parse(content).unwrap();
2649 assert_eq!(operators.len(), 2);
2650 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2651 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2652 }
2653
2654 #[test]
2655 fn test_tokenizer_array_parsing() {
2656 let content = b"100 200 m 150 250 l";
2658 let operators = ContentParser::parse(content).unwrap();
2659
2660 assert_eq!(operators.len(), 2);
2661 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2662 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2663 }
2664
2665 #[test]
2666 fn test_parser_rectangle_operations() {
2667 let content = b"10 20 100 50 re 0 0 200 300 re";
2668 let operators = ContentParser::parse(content).unwrap();
2669
2670 assert_eq!(operators.len(), 2);
2671 match &operators[0] {
2672 ContentOperation::Rectangle(x, y, width, height) => {
2673 assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2674 }
2675 _ => panic!("Expected Rectangle operation"),
2676 }
2677 match &operators[1] {
2678 ContentOperation::Rectangle(x, y, width, height) => {
2679 assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2680 }
2681 _ => panic!("Expected Rectangle operation"),
2682 }
2683 }
2684
2685 #[test]
2686 fn test_parser_clipping_operations() {
2687 let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2688 let operators = ContentParser::parse(content).unwrap();
2689
2690 assert_eq!(operators.len(), 6);
2691 assert_eq!(operators[1], ContentOperation::Clip);
2692 assert_eq!(operators[2], ContentOperation::EndPath);
2693 assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2694 assert_eq!(operators[5], ContentOperation::EndPath);
2695 }
2696
2697 #[test]
2698 fn test_parser_painting_operations() {
2699 let content = b"S s f f* B B* b b*";
2700 let operators = ContentParser::parse(content).unwrap();
2701
2702 assert_eq!(operators.len(), 8);
2703 assert_eq!(operators[0], ContentOperation::Stroke);
2704 assert_eq!(operators[1], ContentOperation::CloseStroke);
2705 assert_eq!(operators[2], ContentOperation::Fill);
2706 assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2707 assert_eq!(operators[4], ContentOperation::FillStroke);
2708 assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2709 assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2710 assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2711 }
2712
2713 #[test]
2714 fn test_parser_line_style_operations() {
2715 let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2716 let operators = ContentParser::parse(content).unwrap();
2717
2718 assert_eq!(operators.len(), 5);
2719 assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2720 assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2721 assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2722 assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2723 }
2725
2726 #[test]
2727 fn test_parser_text_state_operations() {
2728 let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2729 let operators = ContentParser::parse(content).unwrap();
2730
2731 assert_eq!(operators.len(), 5);
2732 assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2733 assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2734 assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2735 assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2736 assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2737 }
2738
2739 #[test]
2740 fn test_parser_unicode_text() {
2741 let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2742 let operators = ContentParser::parse(content).unwrap();
2743
2744 assert_eq!(operators.len(), 3);
2745 assert_eq!(operators[0], ContentOperation::BeginText);
2746 match &operators[1] {
2747 ContentOperation::ShowText(text) => {
2748 assert!(text.len() > 5); }
2750 _ => panic!("Expected ShowText operation"),
2751 }
2752 assert_eq!(operators[2], ContentOperation::EndText);
2753 }
2754
2755 #[test]
2756 fn test_parser_stress_test_large_coordinates() {
2757 let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2758 let operators = ContentParser::parse(content).unwrap();
2759
2760 assert_eq!(operators.len(), 1);
2761 match &operators[0] {
2762 ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2763 assert!((*_x1 - 999999.999).abs() < 0.1);
2764 assert!((*_y1 - (-999999.999)).abs() < 0.1);
2765 assert!((*_x3 - 999999.999).abs() < 0.1);
2766 }
2767 _ => panic!("Expected CurveTo operation"),
2768 }
2769 }
2770
2771 #[test]
2772 fn test_parser_empty_content_stream() {
2773 let content = b"";
2774 let operators = ContentParser::parse(content).unwrap();
2775 assert!(operators.is_empty());
2776
2777 let content = b" \n\t\r ";
2778 let operators = ContentParser::parse(content).unwrap();
2779 assert!(operators.is_empty());
2780 }
2781
2782 #[test]
2783 fn test_tokenizer_error_recovery() {
2784 let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2786 let result = ContentParser::parse(content);
2787 assert!(result.is_ok() || result.is_err());
2789 }
2790
2791 #[test]
2792 fn test_parser_optimization_repeated_operations() {
2793 let mut content = Vec::new();
2795 for i in 0..1000 {
2796 content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2797 }
2798
2799 let start = std::time::Instant::now();
2800 let operators = ContentParser::parse(&content).unwrap();
2801 let duration = start.elapsed();
2802
2803 assert_eq!(operators.len(), 1000);
2804 assert!(duration.as_millis() < 200); }
2806
2807 #[test]
2808 fn test_parser_memory_efficiency_large_strings() {
2809 let large_text = "A".repeat(10000);
2811 let content = format!("BT ({}) Tj ET", large_text);
2812 let operators = ContentParser::parse(content.as_bytes()).unwrap();
2813
2814 assert_eq!(operators.len(), 3);
2815 match &operators[1] {
2816 ContentOperation::ShowText(text) => {
2817 assert_eq!(text.len(), 10000);
2818 }
2819 _ => panic!("Expected ShowText operation"),
2820 }
2821 }
2822 }
2823
2824 #[test]
2825 fn test_content_stream_too_large() {
2826 let mut large_content = Vec::new();
2828
2829 for i in 0..10000 {
2831 large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
2832 }
2833 large_content.extend_from_slice(b"S");
2834
2835 let result = ContentParser::parse_content(&large_content);
2837 assert!(result.is_ok());
2838
2839 let operations = result.unwrap();
2840 assert!(operations.len() > 10000);
2842 }
2843
2844 #[test]
2845 fn test_invalid_operator_handling() {
2846 let content = b"100 200 INVALID_OP 300 400 m";
2848 let result = ContentParser::parse_content(content);
2849
2850 if let Ok(operations) = result {
2852 assert!(operations
2854 .iter()
2855 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
2856 }
2857 }
2858
2859 #[test]
2860 fn test_nested_arrays_malformed() {
2861 let content = b"[[(Hello] [World)]] TJ";
2863 let result = ContentParser::parse_content(content);
2864
2865 assert!(result.is_ok() || result.is_err());
2867 }
2868
2869 #[test]
2870 fn test_escape_sequences_in_strings() {
2871 let test_cases = vec![
2873 (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
2874 (b"(\\\\)".as_slice(), b"\\".as_slice()),
2875 (b"(\\(\\))".as_slice(), b"()".as_slice()),
2876 (b"(\\123)".as_slice(), b"S".as_slice()), (b"(\\0)".as_slice(), b"\0".as_slice()),
2878 ];
2879
2880 for (input, expected) in test_cases {
2881 let mut content = Vec::new();
2882 content.extend_from_slice(input);
2883 content.extend_from_slice(b" Tj");
2884
2885 let result = ContentParser::parse_content(&content);
2886 assert!(result.is_ok());
2887
2888 let operations = result.unwrap();
2889 if let ContentOperation::ShowText(text) = &operations[0] {
2890 assert_eq!(text, expected, "Failed for input: {:?}", input);
2891 } else {
2892 panic!("Expected ShowText operation");
2893 }
2894 }
2895 }
2896
2897 #[test]
2898 fn test_content_with_inline_images() {
2899 let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
2901 let result = ContentParser::parse_content(content);
2902
2903 assert!(result.is_ok() || result.is_err());
2905 }
2906
2907 #[test]
2908 fn test_operator_with_missing_operands() {
2909 let test_cases = vec![
2911 b"Tj" as &[u8], b"m", b"rg", b"Tf", ];
2916
2917 for content in test_cases {
2918 let result = ContentParser::parse_content(content);
2919 assert!(result.is_ok() || result.is_err());
2921 }
2922 }
2923
2924 #[test]
2927 fn test_tokenizer_handles_curly_braces() {
2928 let input = b"q { } Q";
2931 let mut tokenizer = ContentTokenizer::new(input);
2932
2933 let mut tokens = Vec::new();
2934 while let Some(token) = tokenizer.next_token().unwrap() {
2935 tokens.push(token);
2936 }
2937
2938 assert!(tokens.contains(&Token::Operator("q".to_string())));
2940 assert!(tokens.contains(&Token::Operator("Q".to_string())));
2941 }
2942
2943 #[test]
2944 fn test_tokenizer_handles_closing_paren() {
2945 let input = b"q ) Q";
2947 let mut tokenizer = ContentTokenizer::new(input);
2948
2949 let mut tokens = Vec::new();
2950 while let Some(token) = tokenizer.next_token().unwrap() {
2951 tokens.push(token);
2952 }
2953
2954 assert!(tokens.contains(&Token::Operator("q".to_string())));
2955 assert!(tokens.contains(&Token::Operator("Q".to_string())));
2956 }
2957
2958 #[test]
2959 fn test_inline_image_binary_with_curly_braces() {
2960 let content = b"BI /W 2 /H 2 /BPC 8 /CS /G ID \x7B\x7D\x00\xFF EI Q";
2963 let result = ContentParser::parse_content(content);
2964 assert!(
2965 result.is_ok(),
2966 "Parsing inline image with curly braces failed: {:?}",
2967 result.err()
2968 );
2969
2970 let ops = result.unwrap();
2971 let has_inline = ops
2973 .iter()
2974 .any(|op| matches!(op, ContentOperation::InlineImage { .. }));
2975 let has_q = ops
2976 .iter()
2977 .any(|op| matches!(op, ContentOperation::RestoreGraphicsState));
2978 assert!(has_inline, "Expected InlineImage operation");
2979 assert!(has_q, "Expected RestoreGraphicsState after EI");
2980 }
2981
2982 #[test]
2983 fn test_inline_image_binary_with_all_byte_values() {
2984 let mut content = Vec::new();
2986 content.extend_from_slice(b"BI /W 16 /H 16 /BPC 8 /CS /G ID ");
2987 for b in 0u8..=255 {
2989 content.push(b);
2990 }
2991 content.extend_from_slice(b" EI Q");
2992
2993 let result = ContentParser::parse_content(&content);
2994 assert!(
2995 result.is_ok(),
2996 "Parsing inline image with all byte values failed: {:?}",
2997 result.err()
2998 );
2999 }
3000
3001 #[test]
3002 fn test_inline_image_ei_detection() {
3003 let content = b"BI /W 2 /H 1 /BPC 8 /CS /G ID \x45\x49\x00\n EI Q";
3006 let result = ContentParser::parse_content(content);
3008 assert!(result.is_ok(), "EI detection failed: {:?}", result.err());
3009
3010 let ops = result.unwrap();
3011 let has_inline = ops
3012 .iter()
3013 .any(|op| matches!(op, ContentOperation::InlineImage { .. }));
3014 assert!(has_inline, "Expected InlineImage operation");
3015 }
3016
3017 #[test]
3018 fn test_tokenizer_no_infinite_loop_on_consecutive_delimiters() {
3019 let input = b"q {{{}}})))) Q";
3021 let mut tokenizer = ContentTokenizer::new(input);
3022
3023 let mut tokens = Vec::new();
3024 while let Some(token) = tokenizer.next_token().unwrap() {
3025 tokens.push(token);
3026 if tokens.len() > 100 {
3027 panic!("Tokenizer produced too many tokens — possible infinite loop");
3028 }
3029 }
3030
3031 assert!(tokens.contains(&Token::Operator("q".to_string())));
3032 assert!(tokens.contains(&Token::Operator("Q".to_string())));
3033 }
3034
3035 #[test]
3036 fn test_content_parser_inline_image_produces_correct_operation() {
3037 let content = b"BI /W 4 /H 4 /BPC 8 /CS /G ID \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F EI";
3039 let result = ContentParser::parse_content(content);
3040 assert!(result.is_ok(), "Parse failed: {:?}", result.err());
3041
3042 let ops = result.unwrap();
3043 assert_eq!(
3044 ops.len(),
3045 1,
3046 "Expected exactly 1 operation, got {}",
3047 ops.len()
3048 );
3049
3050 if let ContentOperation::InlineImage { params, data } = &ops[0] {
3051 assert_eq!(params.get("Width"), Some(&Object::Integer(4)));
3052 assert_eq!(params.get("Height"), Some(&Object::Integer(4)));
3053 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
3054 assert!(!data.is_empty(), "Image data should not be empty");
3055 } else {
3056 panic!("Expected InlineImage operation, got {:?}", ops[0]);
3057 }
3058 }
3059
3060 #[test]
3061 fn test_octal_escape_overflow_777() {
3062 let mut tokenizer = ContentTokenizer::new(b"(\\777)");
3066 let token = tokenizer.next_token().unwrap().unwrap();
3067 match token {
3068 Token::String(data) => assert_eq!(data, vec![0xFF]),
3069 _ => panic!("Expected string token"),
3070 }
3071 }
3072
3073 #[test]
3074 fn test_octal_escape_overflow_400() {
3075 let mut tokenizer = ContentTokenizer::new(b"(\\400)");
3078 let token = tokenizer.next_token().unwrap().unwrap();
3079 match token {
3080 Token::String(data) => assert_eq!(data, vec![0x00]),
3081 _ => panic!("Expected string token"),
3082 }
3083 }
3084
3085 #[test]
3086 fn test_octal_escape_overflow_577() {
3087 let mut tokenizer = ContentTokenizer::new(b"(\\577)");
3090 let token = tokenizer.next_token().unwrap().unwrap();
3091 match token {
3092 Token::String(data) => assert_eq!(data, vec![0x7F]),
3093 _ => panic!("Expected string token"),
3094 }
3095 }
3096
3097 #[test]
3098 fn test_octal_escape_max_valid_377() {
3099 let mut tokenizer = ContentTokenizer::new(b"(\\377)");
3101 let token = tokenizer.next_token().unwrap().unwrap();
3102 match token {
3103 Token::String(data) => assert_eq!(data, vec![0xFF]),
3104 _ => panic!("Expected string token"),
3105 }
3106 }
3107
3108 #[test]
3109 fn test_octal_escape_overflow_mixed_with_valid() {
3110 let mut tokenizer = ContentTokenizer::new(b"(A\\777B\\101C)");
3112 let token = tokenizer.next_token().unwrap().unwrap();
3113 match token {
3114 Token::String(data) => {
3115 assert_eq!(data, vec![b'A', 0xFF, b'B', b'A', b'C']);
3116 }
3117 _ => panic!("Expected string token"),
3118 }
3119 }
3120}