1use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60#[derive(Debug, Clone, PartialEq)]
74pub enum MarkedContentValue {
75 String(Vec<u8>),
79 Integer(i64),
81 Real(f64),
83 Name(String),
85 Array(Vec<MarkedContentValue>),
87 Dict(HashMap<String, MarkedContentValue>),
89}
90
91#[derive(Debug, Clone, PartialEq)]
101pub enum MarkedContentProps {
102 Inline(HashMap<String, MarkedContentValue>),
103 ResourceRef(String),
104}
105
106#[derive(Debug, Clone, PartialEq)]
140pub enum ContentOperation {
141 BeginText,
145
146 EndText,
149
150 SetCharSpacing(f32),
154
155 SetWordSpacing(f32),
158
159 SetHorizontalScaling(f32),
162
163 SetLeading(f32),
166
167 SetFont(String, f32),
170
171 SetTextRenderMode(i32),
174
175 SetTextRise(f32),
178
179 MoveText(f32, f32),
183
184 MoveTextSetLeading(f32, f32),
187
188 SetTextMatrix(f32, f32, f32, f32, f32, f32),
191
192 NextLine,
195
196 ShowText(Vec<u8>),
200
201 ShowTextArray(Vec<TextElement>),
204
205 NextLineShowText(Vec<u8>),
208
209 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
212
213 SaveGraphicsState,
217
218 RestoreGraphicsState,
221
222 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
225
226 SetLineWidth(f32),
228
229 SetLineCap(i32),
232
233 SetLineJoin(i32),
236
237 SetMiterLimit(f32),
240
241 SetDashPattern(Vec<f32>, f32),
244
245 SetIntent(String),
248
249 SetFlatness(f32),
252
253 SetGraphicsStateParams(String),
256
257 MoveTo(f32, f32),
260
261 LineTo(f32, f32),
263
264 CurveTo(f32, f32, f32, f32, f32, f32),
267
268 CurveToV(f32, f32, f32, f32),
270
271 CurveToY(f32, f32, f32, f32),
273
274 ClosePath,
277
278 Rectangle(f32, f32, f32, f32),
281
282 Stroke,
285
286 CloseStroke,
289
290 Fill,
292
293 FillEvenOdd,
295
296 FillStroke,
299
300 FillStrokeEvenOdd,
302
303 CloseFillStroke,
306
307 CloseFillStrokeEvenOdd,
309
310 EndPath,
313
314 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
322
323 SetNonStrokingColorSpace(String),
326
327 SetStrokingColor(Vec<f32>),
330
331 SetNonStrokingColor(Vec<f32>),
334
335 SetStrokingGray(f32),
338
339 SetNonStrokingGray(f32),
341
342 SetStrokingRGB(f32, f32, f32),
345
346 SetNonStrokingRGB(f32, f32, f32),
348
349 SetStrokingCMYK(f32, f32, f32, f32),
351
352 SetNonStrokingCMYK(f32, f32, f32, f32),
354
355 ShadingFill(String), BeginInlineImage,
361 InlineImage {
363 params: HashMap<String, Object>,
365 data: Vec<u8>,
367 },
368
369 PaintXObject(String),
373
374 BeginMarkedContent(String), BeginMarkedContentWithProps(String, MarkedContentProps), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, MarkedContentProps), BeginCompatibility, EndCompatibility, }
385
386#[derive(Debug, Clone, PartialEq)]
405pub enum TextElement {
406 Text(Vec<u8>),
408 Spacing(f32),
411}
412
413#[derive(Debug, Clone, PartialEq)]
415pub(super) enum Token {
416 Number(f32),
417 Integer(i32),
418 String(Vec<u8>),
419 HexString(Vec<u8>),
420 Name(String),
421 Operator(String),
422 ArrayStart,
423 ArrayEnd,
424 DictStart,
425 DictEnd,
426 InlineImageData(Vec<u8>),
430}
431
432pub struct ContentTokenizer<'a> {
434 input: &'a [u8],
435 position: usize,
436 in_inline_image: bool,
439}
440
441impl<'a> ContentTokenizer<'a> {
442 pub fn new(input: &'a [u8]) -> Self {
444 Self {
445 input,
446 position: 0,
447 in_inline_image: false,
448 }
449 }
450
451 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
453 if self.in_inline_image {
455 self.in_inline_image = false;
456 return self.read_inline_image_data();
457 }
458
459 self.skip_whitespace();
460
461 if self.position >= self.input.len() {
462 return Ok(None);
463 }
464
465 let ch = self.input[self.position];
466
467 match ch {
468 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
470
471 b'(' => self.read_literal_string(),
473 b'<' => {
474 if self.peek_next() == Some(b'<') {
475 self.position += 2;
476 Ok(Some(Token::DictStart))
477 } else {
478 self.read_hex_string()
479 }
480 }
481 b'>' => {
482 if self.peek_next() == Some(b'>') {
483 self.position += 2;
484 Ok(Some(Token::DictEnd))
485 } else {
486 Err(ParseError::SyntaxError {
487 position: self.position,
488 message: "Unexpected '>'".to_string(),
489 })
490 }
491 }
492
493 b'[' => {
495 self.position += 1;
496 Ok(Some(Token::ArrayStart))
497 }
498 b']' => {
499 self.position += 1;
500 Ok(Some(Token::ArrayEnd))
501 }
502
503 b'/' => self.read_name(),
505
506 b';' | b')' | b'{' | b'}' => {
511 self.position += 1;
512 self.next_token() }
514
515 _ => {
517 let token = self.read_operator()?;
518 if let Some(Token::Operator(ref op)) = token {
520 if op == "ID" {
521 self.in_inline_image = true;
522 }
523 }
524 Ok(token)
525 }
526 }
527 }
528
529 fn skip_whitespace(&mut self) {
530 while self.position < self.input.len() {
531 match self.input[self.position] {
532 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
533 b'%' => self.skip_comment(),
534 _ => break,
535 }
536 }
537 }
538
539 fn skip_comment(&mut self) {
540 while self.position < self.input.len() && self.input[self.position] != b'\n' {
541 self.position += 1;
542 }
543 }
544
545 fn peek_next(&self) -> Option<u8> {
546 if self.position + 1 < self.input.len() {
547 Some(self.input[self.position + 1])
548 } else {
549 None
550 }
551 }
552
553 fn read_number(&mut self) -> ParseResult<Option<Token>> {
554 let start = self.position;
555 let mut has_dot = false;
556
557 if self.position < self.input.len()
559 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
560 {
561 self.position += 1;
562 }
563
564 while self.position < self.input.len() {
566 match self.input[self.position] {
567 b'0'..=b'9' => self.position += 1,
568 b'.' if !has_dot => {
569 has_dot = true;
570 self.position += 1;
571 }
572 _ => break,
573 }
574 }
575
576 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
577 ParseError::SyntaxError {
578 position: start,
579 message: "Invalid number format".to_string(),
580 }
581 })?;
582
583 if has_dot {
584 let value = num_str
585 .parse::<f32>()
586 .map_err(|_| ParseError::SyntaxError {
587 position: start,
588 message: "Invalid float number".to_string(),
589 })?;
590 Ok(Some(Token::Number(value)))
591 } else {
592 let value = num_str
593 .parse::<i32>()
594 .map_err(|_| ParseError::SyntaxError {
595 position: start,
596 message: "Invalid integer number".to_string(),
597 })?;
598 Ok(Some(Token::Integer(value)))
599 }
600 }
601
602 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
603 self.position += 1; let mut result = Vec::new();
605 let mut paren_depth = 1;
606 let mut escape = false;
607
608 while self.position < self.input.len() && paren_depth > 0 {
609 let ch = self.input[self.position];
610 self.position += 1;
611
612 if escape {
613 match ch {
614 b'n' => result.push(b'\n'),
615 b'r' => result.push(b'\r'),
616 b't' => result.push(b'\t'),
617 b'b' => result.push(b'\x08'),
618 b'f' => result.push(b'\x0C'),
619 b'(' => result.push(b'('),
620 b')' => result.push(b')'),
621 b'\\' => result.push(b'\\'),
622 b'0'..=b'7' => {
623 self.position -= 1;
625 let octal_value = self.read_octal_escape()?;
626 result.push(octal_value);
627 }
628 _ => result.push(ch), }
630 escape = false;
631 } else {
632 match ch {
633 b'\\' => escape = true,
634 b'(' => {
635 paren_depth += 1;
636 result.push(ch);
637 }
638 b')' => {
639 paren_depth -= 1;
640 if paren_depth > 0 {
641 result.push(ch);
642 }
643 }
644 _ => result.push(ch),
645 }
646 }
647 }
648
649 Ok(Some(Token::String(result)))
650 }
651
652 fn read_octal_escape(&mut self) -> ParseResult<u8> {
653 let mut value = 0u16;
656 let mut count = 0;
657
658 while count < 3 && self.position < self.input.len() {
659 match self.input[self.position] {
660 b'0'..=b'7' => {
661 value = value * 8 + u16::from(self.input[self.position] - b'0');
662 self.position += 1;
663 count += 1;
664 }
665 _ => break,
666 }
667 }
668
669 Ok(value as u8)
670 }
671
672 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
673 self.position += 1; let mut result = Vec::new();
675 let mut nibble = None;
676
677 while self.position < self.input.len() {
678 let ch = self.input[self.position];
679
680 match ch {
681 b'>' => {
682 self.position += 1;
683 if let Some(n) = nibble {
685 result.push(n << 4);
686 }
687 return Ok(Some(Token::HexString(result)));
688 }
689 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
690 let digit = if ch <= b'9' {
691 ch - b'0'
692 } else if ch <= b'F' {
693 ch - b'A' + 10
694 } else {
695 ch - b'a' + 10
696 };
697
698 if let Some(n) = nibble {
699 result.push((n << 4) | digit);
700 nibble = None;
701 } else {
702 nibble = Some(digit);
703 }
704 self.position += 1;
705 }
706 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
707 self.position += 1;
709 }
710 _ => {
711 return Err(ParseError::SyntaxError {
712 position: self.position,
713 message: format!("Invalid character in hex string: {:?}", ch as char),
714 });
715 }
716 }
717 }
718
719 Err(ParseError::SyntaxError {
720 position: self.position,
721 message: "Unterminated hex string".to_string(),
722 })
723 }
724
725 fn read_name(&mut self) -> ParseResult<Option<Token>> {
726 self.position += 1; let start = self.position;
728
729 while self.position < self.input.len() {
730 let ch = self.input[self.position];
731 match ch {
732 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
733 | b']' | b'{' | b'}' | b'/' | b'%' => break,
734 b'#' => {
735 self.position += 1;
737 if self.position + 1 < self.input.len() {
738 self.position += 2;
739 }
740 }
741 _ => self.position += 1,
742 }
743 }
744
745 let name_bytes = &self.input[start..self.position];
746 let name = self.decode_name(name_bytes)?;
747 Ok(Some(Token::Name(name)))
748 }
749
750 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
751 let mut result = Vec::new();
752 let mut i = 0;
753
754 while i < bytes.len() {
755 if bytes[i] == b'#' && i + 2 < bytes.len() {
756 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
758 ParseError::SyntaxError {
759 position: self.position,
760 message: "Invalid hex escape in name".to_string(),
761 }
762 })?;
763 let value =
764 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
765 position: self.position,
766 message: "Invalid hex escape in name".to_string(),
767 })?;
768 result.push(value);
769 i += 3;
770 } else {
771 result.push(bytes[i]);
772 i += 1;
773 }
774 }
775
776 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
777 position: self.position,
778 message: "Invalid UTF-8 in name".to_string(),
779 })
780 }
781
782 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
783 let start = self.position;
784
785 while self.position < self.input.len() {
786 let ch = self.input[self.position];
787 match ch {
788 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
789 | b']' | b'{' | b'}' | b'/' | b'%' | b';' => break,
790 _ => self.position += 1,
791 }
792 }
793
794 let op_bytes = &self.input[start..self.position];
795 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
796 position: start,
797 message: "Invalid operator".to_string(),
798 })?;
799
800 Ok(Some(Token::Operator(op.to_string())))
801 }
802
803 fn read_inline_image_data(&mut self) -> ParseResult<Option<Token>> {
809 if self.position < self.input.len() {
811 let ch = self.input[self.position];
812 if ch == b' ' || ch == b'\n' || ch == b'\r' || ch == b'\t' {
813 self.position += 1;
814 if ch == b'\r'
816 && self.position < self.input.len()
817 && self.input[self.position] == b'\n'
818 {
819 self.position += 1;
820 }
821 }
822 }
823
824 let start = self.position;
825
826 while self.position + 1 < self.input.len() {
828 let preceded_by_whitespace = self.position == start
829 || matches!(
830 self.input[self.position - 1],
831 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C'
832 );
833
834 if preceded_by_whitespace
835 && self.input[self.position] == b'E'
836 && self.input[self.position + 1] == b'I'
837 {
838 let after_ei = self.position + 2;
839 let followed_by_boundary = after_ei >= self.input.len()
840 || matches!(
841 self.input[after_ei],
842 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'/' | b'<' | b'(' | b'[' | b'%'
843 );
844
845 if followed_by_boundary {
846 let mut end = self.position;
848 if end > start
849 && matches!(self.input[end - 1], b' ' | b'\t' | b'\r' | b'\n' | b'\x0C')
850 {
851 end -= 1;
852 }
853 let data = self.input[start..end].to_vec();
854 self.position = after_ei; return Ok(Some(Token::InlineImageData(data)));
856 }
857 }
858 self.position += 1;
859 }
860
861 let data = self.input[start..].to_vec();
863 self.position = self.input.len();
864 Ok(Some(Token::InlineImageData(data)))
865 }
866}
867
868pub struct ContentParser {
887 tokens: Vec<Token>,
888 position: usize,
889}
890
891impl ContentParser {
892 pub fn new(_content: &[u8]) -> Self {
894 Self {
895 tokens: Vec::new(),
896 position: 0,
897 }
898 }
899
900 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
934 Self::parse_content(content)
935 }
936
937 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
942 let mut tokenizer = ContentTokenizer::new(content);
943 let mut tokens = Vec::new();
944
945 loop {
952 match tokenizer.next_token() {
953 Ok(Some(token)) => tokens.push(token),
954 Ok(None) => break,
955 Err(_e) => {
956 tracing::debug!("content tokenizer stopped early: {_e}");
957 break;
958 }
959 }
960 }
961
962 let mut parser = Self {
963 tokens,
964 position: 0,
965 };
966
967 parser.parse_operators()
968 }
969
970 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
971 let mut operators = Vec::new();
972 let mut operand_stack: Vec<Token> = Vec::new();
973
974 while self.position < self.tokens.len() {
975 let token = self.tokens[self.position].clone();
976 self.position += 1;
977
978 match &token {
979 Token::Operator(op) => {
980 match self.parse_operator(op, &mut operand_stack) {
990 Ok(operator) => operators.push(operator),
991 Err(_e) => {
992 tracing::debug!("skipping malformed content operator '{op}': {_e}");
993 operand_stack.clear();
994 }
995 }
996 }
997 _ => {
998 operand_stack.push(token);
1000 }
1001 }
1002 }
1003
1004 Ok(operators)
1005 }
1006
1007 fn parse_operator(
1008 &mut self,
1009 op: &str,
1010 operands: &mut Vec<Token>,
1011 ) -> ParseResult<ContentOperation> {
1012 let operator = match op {
1013 "BT" => ContentOperation::BeginText,
1015 "ET" => ContentOperation::EndText,
1016
1017 "Tc" => {
1019 let spacing = self.pop_number(operands)?;
1020 ContentOperation::SetCharSpacing(spacing)
1021 }
1022 "Tw" => {
1023 let spacing = self.pop_number(operands)?;
1024 ContentOperation::SetWordSpacing(spacing)
1025 }
1026 "Tz" => {
1027 let scale = self.pop_number(operands)?;
1028 ContentOperation::SetHorizontalScaling(scale)
1029 }
1030 "TL" => {
1031 let leading = self.pop_number(operands)?;
1032 ContentOperation::SetLeading(leading)
1033 }
1034 "Tf" => {
1035 let size = self.pop_number(operands)?;
1036 let font = self.pop_name(operands)?;
1037 ContentOperation::SetFont(font, size)
1038 }
1039 "Tr" => {
1040 let mode = self.pop_integer(operands)?;
1041 ContentOperation::SetTextRenderMode(mode)
1042 }
1043 "Ts" => {
1044 let rise = self.pop_number(operands)?;
1045 ContentOperation::SetTextRise(rise)
1046 }
1047
1048 "Td" => {
1050 let ty = self.pop_number(operands)?;
1051 let tx = self.pop_number(operands)?;
1052 ContentOperation::MoveText(tx, ty)
1053 }
1054 "TD" => {
1055 let ty = self.pop_number(operands)?;
1056 let tx = self.pop_number(operands)?;
1057 ContentOperation::MoveTextSetLeading(tx, ty)
1058 }
1059 "Tm" => {
1060 let f = self.pop_number(operands)?;
1061 let e = self.pop_number(operands)?;
1062 let d = self.pop_number(operands)?;
1063 let c = self.pop_number(operands)?;
1064 let b = self.pop_number(operands)?;
1065 let a = self.pop_number(operands)?;
1066 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
1067 }
1068 "T*" => ContentOperation::NextLine,
1069
1070 "Tj" => {
1072 let text = self.pop_string(operands)?;
1073 ContentOperation::ShowText(text)
1074 }
1075 "TJ" => {
1076 let array = self.pop_array(operands)?;
1077 let elements = self.parse_text_array(array)?;
1078 ContentOperation::ShowTextArray(elements)
1079 }
1080 "'" => {
1081 let text = self.pop_string(operands)?;
1082 ContentOperation::NextLineShowText(text)
1083 }
1084 "\"" => {
1085 let text = self.pop_string(operands)?;
1091 let ac = self.pop_number(operands)?;
1092 let aw = self.pop_number(operands)?;
1093 ContentOperation::SetSpacingNextLineShowText(aw, ac, text)
1094 }
1095
1096 "q" => ContentOperation::SaveGraphicsState,
1098 "Q" => ContentOperation::RestoreGraphicsState,
1099 "cm" => {
1100 let f = self.pop_number(operands)?;
1101 let e = self.pop_number(operands)?;
1102 let d = self.pop_number(operands)?;
1103 let c = self.pop_number(operands)?;
1104 let b = self.pop_number(operands)?;
1105 let a = self.pop_number(operands)?;
1106 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
1107 }
1108 "w" => {
1109 let width = self.pop_number(operands)?;
1110 ContentOperation::SetLineWidth(width)
1111 }
1112 "J" => {
1113 let cap = self.pop_integer(operands)?;
1114 ContentOperation::SetLineCap(cap)
1115 }
1116 "j" => {
1117 let join = self.pop_integer(operands)?;
1118 ContentOperation::SetLineJoin(join)
1119 }
1120 "M" => {
1121 let limit = self.pop_number(operands)?;
1122 ContentOperation::SetMiterLimit(limit)
1123 }
1124 "d" => {
1125 let phase = self.pop_number(operands)?;
1126 let array = self.pop_array(operands)?;
1127 let pattern = self.parse_dash_array(array)?;
1128 ContentOperation::SetDashPattern(pattern, phase)
1129 }
1130 "ri" => {
1131 let intent = self.pop_name(operands)?;
1132 ContentOperation::SetIntent(intent)
1133 }
1134 "i" => {
1135 let flatness = self.pop_number(operands)?;
1136 ContentOperation::SetFlatness(flatness)
1137 }
1138 "gs" => {
1139 let name = self.pop_name(operands)?;
1140 ContentOperation::SetGraphicsStateParams(name)
1141 }
1142
1143 "m" => {
1145 let y = self.pop_number(operands)?;
1146 let x = self.pop_number(operands)?;
1147 ContentOperation::MoveTo(x, y)
1148 }
1149 "l" => {
1150 let y = self.pop_number(operands)?;
1151 let x = self.pop_number(operands)?;
1152 ContentOperation::LineTo(x, y)
1153 }
1154 "c" => {
1155 let y3 = self.pop_number(operands)?;
1156 let x3 = self.pop_number(operands)?;
1157 let y2 = self.pop_number(operands)?;
1158 let x2 = self.pop_number(operands)?;
1159 let y1 = self.pop_number(operands)?;
1160 let x1 = self.pop_number(operands)?;
1161 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
1162 }
1163 "v" => {
1164 let y3 = self.pop_number(operands)?;
1165 let x3 = self.pop_number(operands)?;
1166 let y2 = self.pop_number(operands)?;
1167 let x2 = self.pop_number(operands)?;
1168 ContentOperation::CurveToV(x2, y2, x3, y3)
1169 }
1170 "y" => {
1171 let y3 = self.pop_number(operands)?;
1172 let x3 = self.pop_number(operands)?;
1173 let y1 = self.pop_number(operands)?;
1174 let x1 = self.pop_number(operands)?;
1175 ContentOperation::CurveToY(x1, y1, x3, y3)
1176 }
1177 "h" => ContentOperation::ClosePath,
1178 "re" => {
1179 let height = self.pop_number(operands)?;
1180 let width = self.pop_number(operands)?;
1181 let y = self.pop_number(operands)?;
1182 let x = self.pop_number(operands)?;
1183 ContentOperation::Rectangle(x, y, width, height)
1184 }
1185
1186 "S" => ContentOperation::Stroke,
1188 "s" => ContentOperation::CloseStroke,
1189 "f" | "F" => ContentOperation::Fill,
1190 "f*" => ContentOperation::FillEvenOdd,
1191 "B" => ContentOperation::FillStroke,
1192 "B*" => ContentOperation::FillStrokeEvenOdd,
1193 "b" => ContentOperation::CloseFillStroke,
1194 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1195 "n" => ContentOperation::EndPath,
1196
1197 "W" => ContentOperation::Clip,
1199 "W*" => ContentOperation::ClipEvenOdd,
1200
1201 "CS" => {
1203 let name = self.pop_name(operands)?;
1204 ContentOperation::SetStrokingColorSpace(name)
1205 }
1206 "cs" => {
1207 let name = self.pop_name(operands)?;
1208 ContentOperation::SetNonStrokingColorSpace(name)
1209 }
1210 "SC" | "SCN" => {
1211 let components = self.pop_color_components(operands)?;
1212 ContentOperation::SetStrokingColor(components)
1213 }
1214 "sc" | "scn" => {
1215 let components = self.pop_color_components(operands)?;
1216 ContentOperation::SetNonStrokingColor(components)
1217 }
1218 "G" => {
1219 let gray = self.pop_number(operands)?;
1220 ContentOperation::SetStrokingGray(gray)
1221 }
1222 "g" => {
1223 let gray = self.pop_number(operands)?;
1224 ContentOperation::SetNonStrokingGray(gray)
1225 }
1226 "RG" => {
1227 let b = self.pop_number(operands)?;
1228 let g = self.pop_number(operands)?;
1229 let r = self.pop_number(operands)?;
1230 ContentOperation::SetStrokingRGB(r, g, b)
1231 }
1232 "rg" => {
1233 let b = self.pop_number(operands)?;
1234 let g = self.pop_number(operands)?;
1235 let r = self.pop_number(operands)?;
1236 ContentOperation::SetNonStrokingRGB(r, g, b)
1237 }
1238 "K" => {
1239 let k = self.pop_number(operands)?;
1240 let y = self.pop_number(operands)?;
1241 let m = self.pop_number(operands)?;
1242 let c = self.pop_number(operands)?;
1243 ContentOperation::SetStrokingCMYK(c, m, y, k)
1244 }
1245 "k" => {
1246 let k = self.pop_number(operands)?;
1247 let y = self.pop_number(operands)?;
1248 let m = self.pop_number(operands)?;
1249 let c = self.pop_number(operands)?;
1250 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1251 }
1252
1253 "sh" => {
1255 let name = self.pop_name(operands)?;
1256 ContentOperation::ShadingFill(name)
1257 }
1258
1259 "Do" => {
1261 let name = self.pop_name(operands)?;
1262 ContentOperation::PaintXObject(name)
1263 }
1264
1265 "BMC" => {
1267 let tag = self.pop_name(operands)?;
1268 ContentOperation::BeginMarkedContent(tag)
1269 }
1270 "BDC" => {
1271 let props = self.pop_dict_or_name(operands)?;
1272 let tag = self.pop_name(operands)?;
1273 ContentOperation::BeginMarkedContentWithProps(tag, props)
1274 }
1275 "EMC" => ContentOperation::EndMarkedContent,
1276 "MP" => {
1277 let tag = self.pop_name(operands)?;
1278 ContentOperation::DefineMarkedContentPoint(tag)
1279 }
1280 "DP" => {
1281 let props = self.pop_dict_or_name(operands)?;
1282 let tag = self.pop_name(operands)?;
1283 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1284 }
1285
1286 "BX" => ContentOperation::BeginCompatibility,
1288 "EX" => ContentOperation::EndCompatibility,
1289
1290 "BI" => {
1292 operands.clear(); self.parse_inline_image()?
1294 }
1295
1296 _ => {
1297 return Err(ParseError::SyntaxError {
1298 position: self.position,
1299 message: format!("Unknown operator: {op}"),
1300 });
1301 }
1302 };
1303
1304 operands.clear(); Ok(operator)
1306 }
1307
1308 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1310 match operands.pop() {
1311 Some(Token::Number(n)) => Ok(n),
1312 Some(Token::Integer(i)) => Ok(i as f32),
1313 _ => Err(ParseError::SyntaxError {
1314 position: self.position,
1315 message: "Expected number operand".to_string(),
1316 }),
1317 }
1318 }
1319
1320 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1321 match operands.pop() {
1322 Some(Token::Integer(i)) => Ok(i),
1323 _ => Err(ParseError::SyntaxError {
1324 position: self.position,
1325 message: "Expected integer operand".to_string(),
1326 }),
1327 }
1328 }
1329
1330 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1331 match operands.pop() {
1332 Some(Token::Name(n)) => Ok(n),
1333 _ => Err(ParseError::SyntaxError {
1334 position: self.position,
1335 message: "Expected name operand".to_string(),
1336 }),
1337 }
1338 }
1339
1340 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1341 match operands.pop() {
1342 Some(Token::String(s)) => Ok(s),
1343 Some(Token::HexString(s)) => Ok(s),
1344 _ => Err(ParseError::SyntaxError {
1345 position: self.position,
1346 message: "Expected string operand".to_string(),
1347 }),
1348 }
1349 }
1350
1351 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1352 let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1354 if has_array_end {
1355 operands.pop(); }
1357
1358 let mut array = Vec::new();
1359 let mut found_start = false;
1360
1361 while let Some(token) = operands.pop() {
1363 match token {
1364 Token::ArrayStart => {
1365 found_start = true;
1366 break;
1367 }
1368 Token::ArrayEnd => {
1369 continue;
1371 }
1372 _ => array.push(token),
1373 }
1374 }
1375
1376 if !found_start {
1377 return Err(ParseError::SyntaxError {
1378 position: self.position,
1379 message: "Expected array".to_string(),
1380 });
1381 }
1382
1383 array.reverse(); Ok(array)
1385 }
1386
1387 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<MarkedContentProps> {
1388 let token = operands.pop().ok_or_else(|| ParseError::SyntaxError {
1389 position: self.position,
1390 message: "Expected dict or name operand for BDC/DP".to_string(),
1391 })?;
1392
1393 match token {
1394 Token::Name(name) => Ok(MarkedContentProps::ResourceRef(name)),
1395 Token::DictEnd => {
1396 let mut map: HashMap<String, MarkedContentValue> = HashMap::new();
1400 loop {
1401 let next = operands.pop().ok_or_else(|| ParseError::SyntaxError {
1402 position: self.position,
1403 message: "Unterminated inline dict in BDC/DP".to_string(),
1404 })?;
1405 if matches!(next, Token::DictStart) {
1406 break;
1407 }
1408 let value = Self::token_to_mc_value(next, operands)?;
1409 let key = match operands.pop() {
1410 Some(Token::Name(k)) => k,
1411 Some(other) => {
1412 return Err(ParseError::SyntaxError {
1413 position: self.position,
1414 message: format!(
1415 "Expected Name as inline dict key, got {:?}",
1416 other
1417 ),
1418 });
1419 }
1420 None => {
1421 return Err(ParseError::SyntaxError {
1422 position: self.position,
1423 message: "Unterminated inline dict (missing key)".to_string(),
1424 });
1425 }
1426 };
1427 map.insert(key, value);
1428 }
1429 Ok(MarkedContentProps::Inline(map))
1430 }
1431 other => Err(ParseError::SyntaxError {
1432 position: self.position,
1433 message: format!("Expected name or inline dict for BDC/DP, got {:?}", other),
1434 }),
1435 }
1436 }
1437
1438 fn token_to_mc_value(
1442 token: Token,
1443 operands: &mut Vec<Token>,
1444 ) -> ParseResult<MarkedContentValue> {
1445 match token {
1446 Token::String(b) | Token::HexString(b) => Ok(MarkedContentValue::String(b)),
1447 Token::Integer(i) => Ok(MarkedContentValue::Integer(i as i64)),
1448 Token::Number(f) => Ok(MarkedContentValue::Real(f as f64)),
1449 Token::Name(n) => Ok(MarkedContentValue::Name(n)),
1450 Token::ArrayEnd => {
1451 let mut items: Vec<MarkedContentValue> = Vec::new();
1452 loop {
1453 let next = operands.pop().ok_or_else(|| ParseError::SyntaxError {
1454 position: 0,
1455 message: "Unterminated array in marked-content props".to_string(),
1456 })?;
1457 if matches!(next, Token::ArrayStart) {
1458 break;
1459 }
1460 items.push(Self::token_to_mc_value(next, operands)?);
1461 }
1462 items.reverse();
1463 Ok(MarkedContentValue::Array(items))
1464 }
1465 Token::DictEnd => {
1466 let mut nested: HashMap<String, MarkedContentValue> = HashMap::new();
1467 loop {
1468 let next = operands.pop().ok_or_else(|| ParseError::SyntaxError {
1469 position: 0,
1470 message: "Unterminated nested dict in marked-content props".to_string(),
1471 })?;
1472 if matches!(next, Token::DictStart) {
1473 break;
1474 }
1475 let value = Self::token_to_mc_value(next, operands)?;
1476 let key = match operands.pop() {
1477 Some(Token::Name(k)) => k,
1478 _ => {
1479 return Err(ParseError::SyntaxError {
1480 position: 0,
1481 message: "Expected name key in nested dict".to_string(),
1482 });
1483 }
1484 };
1485 nested.insert(key, value);
1486 }
1487 Ok(MarkedContentValue::Dict(nested))
1488 }
1489 other => Err(ParseError::SyntaxError {
1490 position: 0,
1491 message: format!("Unexpected token type in marked-content value: {:?}", other),
1492 }),
1493 }
1494 }
1495
1496 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1497 let mut components = Vec::new();
1498
1499 while let Some(token) = operands.last() {
1501 match token {
1502 Token::Number(n) => {
1503 components.push(*n);
1504 operands.pop();
1505 }
1506 Token::Integer(i) => {
1507 components.push(*i as f32);
1508 operands.pop();
1509 }
1510 _ => break,
1511 }
1512 }
1513
1514 components.reverse();
1515 Ok(components)
1516 }
1517
1518 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1519 let mut elements = Vec::new();
1520
1521 for token in tokens {
1522 match token {
1523 Token::String(s) | Token::HexString(s) => {
1524 elements.push(TextElement::Text(s));
1525 }
1526 Token::Number(n) => {
1527 elements.push(TextElement::Spacing(n));
1528 }
1529 Token::Integer(i) => {
1530 elements.push(TextElement::Spacing(i as f32));
1531 }
1532 _ => {
1533 return Err(ParseError::SyntaxError {
1534 position: self.position,
1535 message: "Invalid element in text array".to_string(),
1536 });
1537 }
1538 }
1539 }
1540
1541 Ok(elements)
1542 }
1543
1544 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1545 let mut pattern = Vec::new();
1546
1547 for token in tokens {
1548 match token {
1549 Token::Number(n) => pattern.push(n),
1550 Token::Integer(i) => pattern.push(i as f32),
1551 _ => {
1552 return Err(ParseError::SyntaxError {
1553 position: self.position,
1554 message: "Invalid element in dash array".to_string(),
1555 });
1556 }
1557 }
1558 }
1559
1560 Ok(pattern)
1561 }
1562
1563 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1564 let mut params = HashMap::new();
1566
1567 while self.position < self.tokens.len() {
1568 if let Token::Operator(op) = &self.tokens[self.position] {
1570 if op == "ID" {
1571 self.position += 1;
1572 break;
1573 }
1574 }
1575
1576 if let Token::Name(key) = &self.tokens[self.position] {
1581 self.position += 1;
1582 if self.position >= self.tokens.len() {
1583 break;
1584 }
1585
1586 let value = match &self.tokens[self.position] {
1588 Token::Integer(n) => Object::Integer(*n as i64),
1589 Token::Number(n) => Object::Real(*n as f64),
1590 Token::Name(s) => Object::Name(expand_inline_name(s)),
1591 Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1592 Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1593 _ => Object::Null,
1594 };
1595
1596 let full_key = expand_inline_key(key);
1598 params.insert(full_key, value);
1599 self.position += 1;
1600 } else {
1601 self.position += 1;
1602 }
1603 }
1604
1605 let data = if self.position < self.tokens.len() {
1608 if let Token::InlineImageData(bytes) = &self.tokens[self.position] {
1609 let d = bytes.clone();
1610 self.position += 1;
1611 d
1612 } else {
1613 self.collect_inline_image_data_from_tokens()?
1615 }
1616 } else {
1617 Vec::new()
1618 };
1619
1620 Ok(ContentOperation::InlineImage { params, data })
1621 }
1622
1623 fn collect_inline_image_data_from_tokens(&mut self) -> ParseResult<Vec<u8>> {
1626 let mut data = Vec::new();
1627 while self.position < self.tokens.len() {
1628 if let Token::Operator(op) = &self.tokens[self.position] {
1629 if op == "EI" {
1630 self.position += 1;
1631 break;
1632 }
1633 }
1634 match &self.tokens[self.position] {
1635 Token::String(bytes) | Token::HexString(bytes) => {
1636 data.extend_from_slice(bytes);
1637 }
1638 Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1639 Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1640 Token::Name(s) | Token::Operator(s) => data.extend_from_slice(s.as_bytes()),
1641 _ => {}
1642 }
1643 self.position += 1;
1644 }
1645 Ok(data)
1646 }
1647}
1648
1649fn expand_inline_key(key: &str) -> String {
1651 match key {
1652 "W" => "Width".to_string(),
1653 "H" => "Height".to_string(),
1654 "CS" | "ColorSpace" => "ColorSpace".to_string(),
1655 "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1656 "F" => "Filter".to_string(),
1657 "DP" | "DecodeParms" => "DecodeParms".to_string(),
1658 "IM" => "ImageMask".to_string(),
1659 "I" => "Interpolate".to_string(),
1660 "Intent" => "Intent".to_string(),
1661 "D" => "Decode".to_string(),
1662 _ => key.to_string(),
1663 }
1664}
1665
1666fn expand_inline_name(name: &str) -> String {
1668 match name {
1669 "G" => "DeviceGray".to_string(),
1670 "RGB" => "DeviceRGB".to_string(),
1671 "CMYK" => "DeviceCMYK".to_string(),
1672 "I" => "Indexed".to_string(),
1673 "AHx" => "ASCIIHexDecode".to_string(),
1674 "A85" => "ASCII85Decode".to_string(),
1675 "LZW" => "LZWDecode".to_string(),
1676 "Fl" => "FlateDecode".to_string(),
1677 "RL" => "RunLengthDecode".to_string(),
1678 "DCT" => "DCTDecode".to_string(),
1679 "CCF" => "CCITTFaxDecode".to_string(),
1680 _ => name.to_string(),
1681 }
1682}
1683
1684#[cfg(test)]
1685mod tests {
1686 use super::*;
1687
1688 #[test]
1689 fn test_tokenize_numbers() {
1690 let input = b"123 -45 3.14159 -0.5 .5";
1691 let mut tokenizer = ContentTokenizer::new(input);
1692
1693 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1694 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1695 assert_eq!(
1696 tokenizer.next_token().unwrap(),
1697 Some(Token::Number(3.14159))
1698 );
1699 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1700 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1701 assert_eq!(tokenizer.next_token().unwrap(), None);
1702 }
1703
1704 #[test]
1705 fn test_tokenize_strings() {
1706 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1707 let mut tokenizer = ContentTokenizer::new(input);
1708
1709 assert_eq!(
1710 tokenizer.next_token().unwrap(),
1711 Some(Token::String(b"Hello World".to_vec()))
1712 );
1713 assert_eq!(
1714 tokenizer.next_token().unwrap(),
1715 Some(Token::String(b"Hello\nWorld".to_vec()))
1716 );
1717 assert_eq!(
1718 tokenizer.next_token().unwrap(),
1719 Some(Token::String(b"Nested (paren)".to_vec()))
1720 );
1721 }
1722
1723 #[test]
1724 fn test_tokenize_hex_strings() {
1725 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1726 let mut tokenizer = ContentTokenizer::new(input);
1727
1728 assert_eq!(
1729 tokenizer.next_token().unwrap(),
1730 Some(Token::HexString(b"Hello".to_vec()))
1731 );
1732 assert_eq!(
1733 tokenizer.next_token().unwrap(),
1734 Some(Token::HexString(b"Hello".to_vec()))
1735 );
1736 }
1737
1738 #[test]
1739 fn test_tokenize_names() {
1740 let input = b"/Name /Name#20with#20spaces /A#42C";
1741 let mut tokenizer = ContentTokenizer::new(input);
1742
1743 assert_eq!(
1744 tokenizer.next_token().unwrap(),
1745 Some(Token::Name("Name".to_string()))
1746 );
1747 assert_eq!(
1748 tokenizer.next_token().unwrap(),
1749 Some(Token::Name("Name with spaces".to_string()))
1750 );
1751 assert_eq!(
1752 tokenizer.next_token().unwrap(),
1753 Some(Token::Name("ABC".to_string()))
1754 );
1755 }
1756
1757 #[test]
1758 fn test_tokenize_operators() {
1759 let input = b"BT Tj ET q Q";
1760 let mut tokenizer = ContentTokenizer::new(input);
1761
1762 assert_eq!(
1763 tokenizer.next_token().unwrap(),
1764 Some(Token::Operator("BT".to_string()))
1765 );
1766 assert_eq!(
1767 tokenizer.next_token().unwrap(),
1768 Some(Token::Operator("Tj".to_string()))
1769 );
1770 assert_eq!(
1771 tokenizer.next_token().unwrap(),
1772 Some(Token::Operator("ET".to_string()))
1773 );
1774 assert_eq!(
1775 tokenizer.next_token().unwrap(),
1776 Some(Token::Operator("q".to_string()))
1777 );
1778 assert_eq!(
1779 tokenizer.next_token().unwrap(),
1780 Some(Token::Operator("Q".to_string()))
1781 );
1782 }
1783
1784 #[test]
1785 fn test_parse_text_operators() {
1786 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1787 let operators = ContentParser::parse(content).unwrap();
1788
1789 assert_eq!(operators.len(), 5);
1790 assert_eq!(operators[0], ContentOperation::BeginText);
1791 assert_eq!(
1792 operators[1],
1793 ContentOperation::SetFont("F1".to_string(), 12.0)
1794 );
1795 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1796 assert_eq!(
1797 operators[3],
1798 ContentOperation::ShowText(b"Hello World".to_vec())
1799 );
1800 assert_eq!(operators[4], ContentOperation::EndText);
1801 }
1802
1803 #[test]
1804 fn test_parse_graphics_operators() {
1805 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1806 let operators = ContentParser::parse(content).unwrap();
1807
1808 assert_eq!(operators.len(), 6);
1809 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1810 assert_eq!(
1811 operators[1],
1812 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1813 );
1814 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1815 assert_eq!(
1816 operators[3],
1817 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1818 );
1819 assert_eq!(operators[4], ContentOperation::Stroke);
1820 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1821 }
1822
1823 #[test]
1824 fn test_parse_color_operators() {
1825 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1826 let operators = ContentParser::parse(content).unwrap();
1827
1828 assert_eq!(operators.len(), 3);
1829 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1830 assert_eq!(
1831 operators[1],
1832 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1833 );
1834 assert_eq!(
1835 operators[2],
1836 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1837 );
1838 }
1839
1840 mod comprehensive_tests {
1842 use super::*;
1843
1844 #[test]
1845 fn test_all_text_operators() {
1846 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1848 let operators = ContentParser::parse(content).unwrap();
1849
1850 assert_eq!(operators[0], ContentOperation::BeginText);
1851 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1852 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1853 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1854 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1855 assert_eq!(
1856 operators[5],
1857 ContentOperation::SetFont("F1".to_string(), 12.0)
1858 );
1859 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1860 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1861 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1862 assert_eq!(
1863 operators[9],
1864 ContentOperation::MoveTextSetLeading(50.0, 150.0)
1865 );
1866 assert_eq!(operators[10], ContentOperation::NextLine);
1867 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1868 assert_eq!(operators[12], ContentOperation::EndText);
1869 }
1870
1871 #[test]
1872 fn test_all_graphics_state_operators() {
1873 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1875 let operators = ContentParser::parse(content).unwrap();
1876
1877 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1878 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1879 assert_eq!(
1880 operators[2],
1881 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1882 );
1883 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1884 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1885 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1886 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1887 assert_eq!(
1888 operators[7],
1889 ContentOperation::SetGraphicsStateParams("GS1".to_string())
1890 );
1891 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1892 assert_eq!(
1893 operators[9],
1894 ContentOperation::SetIntent("Perceptual".to_string())
1895 );
1896 }
1897
1898 #[test]
1899 fn test_all_path_construction_operators() {
1900 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1901 let operators = ContentParser::parse(content).unwrap();
1902
1903 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1904 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1905 assert_eq!(
1906 operators[2],
1907 ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1908 );
1909 assert_eq!(
1910 operators[3],
1911 ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1912 );
1913 assert_eq!(
1914 operators[4],
1915 ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1916 );
1917 assert_eq!(operators[5], ContentOperation::ClosePath);
1918 assert_eq!(
1919 operators[6],
1920 ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1921 );
1922 }
1923
1924 #[test]
1925 fn test_all_path_painting_operators() {
1926 let content = b"S s f F f* B B* b b* n W W*";
1927 let operators = ContentParser::parse(content).unwrap();
1928
1929 assert_eq!(operators[0], ContentOperation::Stroke);
1930 assert_eq!(operators[1], ContentOperation::CloseStroke);
1931 assert_eq!(operators[2], ContentOperation::Fill);
1932 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1934 assert_eq!(operators[5], ContentOperation::FillStroke);
1935 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1936 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1937 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1938 assert_eq!(operators[9], ContentOperation::EndPath);
1939 assert_eq!(operators[10], ContentOperation::Clip);
1940 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1941 }
1942
1943 #[test]
1944 fn test_all_color_operators() {
1945 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1947 let operators = ContentParser::parse(content).unwrap();
1948
1949 assert_eq!(
1950 operators[0],
1951 ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1952 );
1953 assert_eq!(
1954 operators[1],
1955 ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1956 );
1957 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1958 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1959 assert_eq!(
1960 operators[4],
1961 ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1962 );
1963 assert_eq!(
1964 operators[5],
1965 ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1966 );
1967 assert_eq!(
1968 operators[6],
1969 ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1970 );
1971 assert_eq!(
1972 operators[7],
1973 ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1974 );
1975 assert_eq!(
1976 operators[8],
1977 ContentOperation::ShadingFill("Shade1".to_string())
1978 );
1979 }
1980
1981 #[test]
1982 fn test_xobject_and_marked_content_operators() {
1983 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1985 let operators = ContentParser::parse(content).unwrap();
1986
1987 assert_eq!(
1988 operators[0],
1989 ContentOperation::PaintXObject("Image1".to_string())
1990 );
1991 assert_eq!(
1992 operators[1],
1993 ContentOperation::BeginMarkedContent("MC1".to_string())
1994 );
1995 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1996 assert_eq!(
1997 operators[3],
1998 ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1999 );
2000 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
2001 assert_eq!(operators[5], ContentOperation::EndCompatibility);
2002 }
2003
2004 #[test]
2005 fn test_complex_content_stream() {
2006 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
2007 let operators = ContentParser::parse(content).unwrap();
2008
2009 assert_eq!(operators.len(), 8);
2010 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2011 assert_eq!(
2012 operators[1],
2013 ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
2014 );
2015 assert_eq!(operators[2], ContentOperation::BeginText);
2016 assert_eq!(
2017 operators[3],
2018 ContentOperation::SetFont("F1".to_string(), 12.0)
2019 );
2020 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
2021 assert_eq!(
2022 operators[5],
2023 ContentOperation::ShowText(b"Complex".to_vec())
2024 );
2025 assert_eq!(operators[6], ContentOperation::EndText);
2026 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
2027 }
2028
2029 #[test]
2030 fn test_tokenizer_whitespace_handling() {
2031 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
2032 let mut tokenizer = ContentTokenizer::new(input);
2033
2034 assert_eq!(
2035 tokenizer.next_token().unwrap(),
2036 Some(Token::Operator("BT".to_string()))
2037 );
2038 assert_eq!(
2039 tokenizer.next_token().unwrap(),
2040 Some(Token::Name("F1".to_string()))
2041 );
2042 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
2043 assert_eq!(
2044 tokenizer.next_token().unwrap(),
2045 Some(Token::Operator("Tf".to_string()))
2046 );
2047 assert_eq!(
2048 tokenizer.next_token().unwrap(),
2049 Some(Token::Operator("ET".to_string()))
2050 );
2051 assert_eq!(tokenizer.next_token().unwrap(), None);
2052 }
2053
2054 #[test]
2055 fn test_tokenizer_edge_cases() {
2056 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
2058 let mut tokenizer = ContentTokenizer::new(input);
2059
2060 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
2061 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
2062 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
2063 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
2064 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
2065 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
2066 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
2067 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
2068 }
2069
2070 #[test]
2071 fn test_string_parsing_edge_cases() {
2072 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
2073 let mut tokenizer = ContentTokenizer::new(input);
2074
2075 assert_eq!(
2076 tokenizer.next_token().unwrap(),
2077 Some(Token::String(b"Simple".to_vec()))
2078 );
2079 assert_eq!(
2080 tokenizer.next_token().unwrap(),
2081 Some(Token::String(b"With\\backslash".to_vec()))
2082 );
2083 assert_eq!(
2084 tokenizer.next_token().unwrap(),
2085 Some(Token::String(b"With)paren".to_vec()))
2086 );
2087 assert_eq!(
2088 tokenizer.next_token().unwrap(),
2089 Some(Token::String(b"With\newline".to_vec()))
2090 );
2091 assert_eq!(
2092 tokenizer.next_token().unwrap(),
2093 Some(Token::String(b"With\ttab".to_vec()))
2094 );
2095 assert_eq!(
2096 tokenizer.next_token().unwrap(),
2097 Some(Token::String(b"With\rcarriage".to_vec()))
2098 );
2099 assert_eq!(
2100 tokenizer.next_token().unwrap(),
2101 Some(Token::String(b"With\x08backspace".to_vec()))
2102 );
2103 assert_eq!(
2104 tokenizer.next_token().unwrap(),
2105 Some(Token::String(b"With\x0Cformfeed".to_vec()))
2106 );
2107 assert_eq!(
2108 tokenizer.next_token().unwrap(),
2109 Some(Token::String(b"With(leftparen".to_vec()))
2110 );
2111 assert_eq!(
2112 tokenizer.next_token().unwrap(),
2113 Some(Token::String(b"With)rightparen".to_vec()))
2114 );
2115 }
2116
2117 #[test]
2118 fn test_hex_string_parsing() {
2119 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
2120 let mut tokenizer = ContentTokenizer::new(input);
2121
2122 assert_eq!(
2123 tokenizer.next_token().unwrap(),
2124 Some(Token::HexString(b"Hello".to_vec()))
2125 );
2126 assert_eq!(
2127 tokenizer.next_token().unwrap(),
2128 Some(Token::HexString(b"Hello".to_vec()))
2129 );
2130 assert_eq!(
2131 tokenizer.next_token().unwrap(),
2132 Some(Token::HexString(b"HelloW".to_vec()))
2133 );
2134 assert_eq!(
2135 tokenizer.next_token().unwrap(),
2136 Some(Token::HexString(b"Hello\x50".to_vec()))
2137 );
2138 }
2139
2140 #[test]
2141 fn test_name_parsing_edge_cases() {
2142 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
2143 let mut tokenizer = ContentTokenizer::new(input);
2144
2145 assert_eq!(
2146 tokenizer.next_token().unwrap(),
2147 Some(Token::Name("Name".to_string()))
2148 );
2149 assert_eq!(
2150 tokenizer.next_token().unwrap(),
2151 Some(Token::Name("Name with spaces".to_string()))
2152 );
2153 assert_eq!(
2154 tokenizer.next_token().unwrap(),
2155 Some(Token::Name("Name#with#hash".to_string()))
2156 );
2157 assert_eq!(
2158 tokenizer.next_token().unwrap(),
2159 Some(Token::Name("Name/with/slash".to_string()))
2160 );
2161 assert_eq!(
2162 tokenizer.next_token().unwrap(),
2163 Some(Token::Name("EmptyName".to_string()))
2164 );
2165 }
2166
2167 #[test]
2168 fn test_operator_parsing_edge_cases() {
2169 let content = b"q q q Q Q Q BT BT ET ET";
2170 let operators = ContentParser::parse(content).unwrap();
2171
2172 assert_eq!(operators.len(), 10);
2173 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2174 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
2175 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
2176 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
2177 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
2178 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
2179 assert_eq!(operators[6], ContentOperation::BeginText);
2180 assert_eq!(operators[7], ContentOperation::BeginText);
2181 assert_eq!(operators[8], ContentOperation::EndText);
2182 assert_eq!(operators[9], ContentOperation::EndText);
2183 }
2184
2185 #[test]
2186 fn test_error_handling_insufficient_operands() {
2187 let content = b"100 Td (kept) Tj";
2192 let ops = ContentParser::parse(content).expect("recovers from bad Td");
2193 assert!(
2194 ops.iter()
2195 .any(|op| matches!(op, ContentOperation::ShowText(t) if t == b"kept")),
2196 "valid Tj after the malformed Td must survive: {ops:?}"
2197 );
2198 }
2199
2200 #[test]
2201 fn test_error_handling_invalid_operator() {
2202 let content = b"100 200 INVALID 10 20 m";
2205 let ops = ContentParser::parse(content).expect("recovers from unknown operator");
2206 assert!(
2207 ops.iter()
2208 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))),
2209 "valid MoveTo after the unknown operator must survive: {ops:?}"
2210 );
2211 }
2212
2213 #[test]
2214 fn test_error_handling_malformed_string() {
2215 let input = b"(Unclosed string";
2217 let mut tokenizer = ContentTokenizer::new(input);
2218 let result = tokenizer.next_token();
2219 assert!(result.is_ok() || result.is_err());
2222 }
2223
2224 #[test]
2225 fn test_error_handling_malformed_hex_string() {
2226 let input = b"<48656C6C6G>";
2227 let mut tokenizer = ContentTokenizer::new(input);
2228 let result = tokenizer.next_token();
2229 assert!(result.is_err());
2230 }
2231
2232 #[test]
2233 fn test_error_handling_malformed_name() {
2234 let input = b"/Name#GG";
2235 let mut tokenizer = ContentTokenizer::new(input);
2236 let result = tokenizer.next_token();
2237 assert!(result.is_err());
2238 }
2239
2240 #[test]
2241 fn test_empty_content_stream() {
2242 let content = b"";
2243 let operators = ContentParser::parse(content).unwrap();
2244 assert_eq!(operators.len(), 0);
2245 }
2246
2247 #[test]
2248 fn test_whitespace_only_content_stream() {
2249 let content = b" \t\n\r ";
2250 let operators = ContentParser::parse(content).unwrap();
2251 assert_eq!(operators.len(), 0);
2252 }
2253
2254 #[test]
2255 fn test_mixed_integer_and_real_operands() {
2256 let content = b"100 200 m 150 200 l";
2258 let operators = ContentParser::parse(content).unwrap();
2259
2260 assert_eq!(operators.len(), 2);
2261 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2262 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2263 }
2264
2265 #[test]
2266 fn test_negative_operands() {
2267 let content = b"-100 -200 Td -50.5 -75.2 TD";
2268 let operators = ContentParser::parse(content).unwrap();
2269
2270 assert_eq!(operators.len(), 2);
2271 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2272 assert_eq!(
2273 operators[1],
2274 ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2275 );
2276 }
2277
2278 #[test]
2279 fn test_large_numbers() {
2280 let content = b"999999.999999 -999999.999999 m";
2281 let operators = ContentParser::parse(content).unwrap();
2282
2283 assert_eq!(operators.len(), 1);
2284 assert_eq!(
2285 operators[0],
2286 ContentOperation::MoveTo(999999.999999, -999999.999999)
2287 );
2288 }
2289
2290 #[test]
2291 fn test_scientific_notation() {
2292 let content = b"123.45 -456.78 m";
2294 let operators = ContentParser::parse(content).unwrap();
2295
2296 assert_eq!(operators.len(), 1);
2297 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2298 }
2299
2300 #[test]
2301 fn test_show_text_array_complex() {
2302 let content = b"(Hello) TJ (kept) Tj";
2306 let ops = ContentParser::parse(content).expect("recovers from malformed TJ");
2307 assert!(
2308 ops.iter()
2309 .any(|op| matches!(op, ContentOperation::ShowText(t) if t == b"kept")),
2310 "valid Tj after the malformed TJ must survive: {ops:?}"
2311 );
2312 }
2313
2314 #[test]
2315 fn test_dash_pattern_empty() {
2316 let content = b"0 d 10 20 m";
2319 let ops = ContentParser::parse(content).expect("recovers from malformed d");
2320 assert!(
2321 ops.iter()
2322 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))),
2323 "valid MoveTo after the malformed dash op must survive: {ops:?}"
2324 );
2325 }
2326
2327 #[test]
2328 fn test_dash_pattern_complex() {
2329 let content = b"2.5 d 10 20 m";
2331 let ops = ContentParser::parse(content).expect("recovers from malformed d");
2332 assert!(
2333 ops.iter()
2334 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))),
2335 "valid MoveTo after the malformed dash op must survive: {ops:?}"
2336 );
2337 }
2338
2339 #[test]
2340 fn test_pop_array_removes_array_end() {
2341 let parser = ContentParser::new(b"");
2343
2344 let mut operands = vec![
2346 Token::ArrayStart,
2347 Token::Integer(1),
2348 Token::Integer(2),
2349 Token::Integer(3),
2350 Token::ArrayEnd,
2351 ];
2352 let result = parser.pop_array(&mut operands).unwrap();
2353 assert_eq!(result.len(), 3);
2354 assert!(operands.is_empty());
2355
2356 let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2358 let result = parser.pop_array(&mut operands).unwrap();
2359 assert_eq!(result.len(), 2);
2360 assert!(operands.is_empty());
2361 }
2362
2363 #[test]
2364 fn test_dash_array_parsing_valid() {
2365 let parser = ContentParser::new(b"");
2367
2368 let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2370 let result = parser.parse_dash_array(valid_tokens).unwrap();
2371 assert_eq!(result, vec![3.0, 2.0]);
2372
2373 let empty_tokens = vec![];
2375 let result = parser.parse_dash_array(empty_tokens).unwrap();
2376 let expected: Vec<f32> = vec![];
2377 assert_eq!(result, expected);
2378 }
2379
2380 #[test]
2381 fn test_text_array_parsing_valid() {
2382 let parser = ContentParser::new(b"");
2384
2385 let valid_tokens = vec![
2387 Token::String(b"Hello".to_vec()),
2388 Token::Number(-100.0),
2389 Token::String(b"World".to_vec()),
2390 ];
2391 let result = parser.parse_text_array(valid_tokens).unwrap();
2392 assert_eq!(result.len(), 3);
2393 }
2394
2395 #[test]
2396 fn test_inline_image_handling() {
2397 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2398 let operators = ContentParser::parse(content).unwrap();
2399
2400 assert_eq!(operators.len(), 1);
2401 match &operators[0] {
2402 ContentOperation::InlineImage { params, data: _ } => {
2403 assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2405 assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2406 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2407 assert_eq!(
2408 params.get("ColorSpace"),
2409 Some(&Object::Name("DeviceRGB".to_string()))
2410 );
2411 }
2413 _ => panic!("Expected InlineImage operation"),
2414 }
2415 }
2416
2417 #[test]
2418 fn test_inline_image_with_filter() {
2419 let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2420 let operators = ContentParser::parse(content).unwrap();
2421
2422 assert_eq!(operators.len(), 1);
2423 match &operators[0] {
2424 ContentOperation::InlineImage { params, data: _ } => {
2425 assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2426 assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2427 assert_eq!(
2428 params.get("ColorSpace"),
2429 Some(&Object::Name("DeviceGray".to_string()))
2430 );
2431 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2432 assert_eq!(
2433 params.get("Filter"),
2434 Some(&Object::Name("ASCIIHexDecode".to_string()))
2435 );
2436 }
2437 _ => panic!("Expected InlineImage operation"),
2438 }
2439 }
2440
2441 #[test]
2442 fn test_content_parser_performance() {
2443 let mut content = Vec::new();
2444 for i in 0..1000 {
2445 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2446 }
2447
2448 let start = std::time::Instant::now();
2449 let operators = ContentParser::parse(&content).unwrap();
2450 let duration = start.elapsed();
2451
2452 assert_eq!(operators.len(), 1000);
2453 assert!(duration.as_millis() < 100); }
2455
2456 #[test]
2457 fn test_tokenizer_performance() {
2458 let mut input = Vec::new();
2459 for i in 0..1000 {
2460 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2461 }
2462
2463 let start = std::time::Instant::now();
2464 let mut tokenizer = ContentTokenizer::new(&input);
2465 let mut count = 0;
2466 while tokenizer.next_token().unwrap().is_some() {
2467 count += 1;
2468 }
2469 let duration = start.elapsed();
2470
2471 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
2474
2475 #[test]
2476 fn test_memory_usage_large_content() {
2477 let mut content = Vec::new();
2478 for i in 0..10000 {
2479 content.extend_from_slice(
2480 format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2481 .as_bytes(),
2482 );
2483 }
2484
2485 let operators = ContentParser::parse(&content).unwrap();
2486 assert_eq!(operators.len(), 10000);
2487
2488 for op in operators {
2490 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2491 }
2492 }
2493
2494 #[test]
2495 fn test_concurrent_parsing() {
2496 use std::sync::Arc;
2497 use std::thread;
2498
2499 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2500 let handles: Vec<_> = (0..10)
2501 .map(|_| {
2502 let content_clone = content.clone();
2503 thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2504 })
2505 .collect();
2506
2507 for handle in handles {
2508 let operators = handle.join().unwrap();
2509 assert_eq!(operators.len(), 5);
2510 assert_eq!(operators[0], ContentOperation::BeginText);
2511 assert_eq!(operators[4], ContentOperation::EndText);
2512 }
2513 }
2514
2515 #[test]
2518 fn test_tokenizer_hex_string_edge_cases() {
2519 let mut tokenizer = ContentTokenizer::new(b"<>");
2520 let token = tokenizer.next_token().unwrap().unwrap();
2521 match token {
2522 Token::HexString(data) => assert!(data.is_empty()),
2523 _ => panic!("Expected empty hex string"),
2524 }
2525
2526 let mut tokenizer = ContentTokenizer::new(b"<123>");
2528 let token = tokenizer.next_token().unwrap().unwrap();
2529 match token {
2530 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2531 _ => panic!("Expected hex string with odd digits"),
2532 }
2533
2534 let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2536 let token = tokenizer.next_token().unwrap().unwrap();
2537 match token {
2538 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2539 _ => panic!("Expected hex string with whitespace"),
2540 }
2541 }
2542
2543 #[test]
2544 fn test_tokenizer_literal_string_escape_sequences() {
2545 let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2547 let token = tokenizer.next_token().unwrap().unwrap();
2548 match token {
2549 Token::String(data) => {
2550 assert_eq!(
2551 data,
2552 vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2553 );
2554 }
2555 _ => panic!("Expected string with escapes"),
2556 }
2557
2558 let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2560 let token = tokenizer.next_token().unwrap().unwrap();
2561 match token {
2562 Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2563 _ => panic!("Expected string with octal escapes"),
2564 }
2565 }
2566
2567 #[test]
2568 fn test_tokenizer_nested_parentheses() {
2569 let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2570 let token = tokenizer.next_token().unwrap().unwrap();
2571 match token {
2572 Token::String(data) => {
2573 assert_eq!(data, b"outer (inner) text");
2574 }
2575 _ => panic!("Expected string with nested parentheses"),
2576 }
2577
2578 let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2580 let token = tokenizer.next_token().unwrap().unwrap();
2581 match token {
2582 Token::String(data) => {
2583 assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2584 }
2585 _ => panic!("Expected string with deep nesting"),
2586 }
2587 }
2588
2589 #[test]
2590 fn test_tokenizer_name_hex_escapes() {
2591 let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2592 let token = tokenizer.next_token().unwrap().unwrap();
2593 match token {
2594 Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2595 _ => panic!("Expected name with hex escapes"),
2596 }
2597
2598 let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2600 let token = tokenizer.next_token().unwrap().unwrap();
2601 match token {
2602 Token::Name(name) => assert_eq!(name, "Special/()<>"),
2603 _ => panic!("Expected name with special character escapes"),
2604 }
2605 }
2606
2607 #[test]
2608 fn test_tokenizer_number_edge_cases() {
2609 let mut tokenizer = ContentTokenizer::new(b"2147483647");
2611 let token = tokenizer.next_token().unwrap().unwrap();
2612 match token {
2613 Token::Integer(n) => assert_eq!(n, 2147483647),
2614 _ => panic!("Expected large integer"),
2615 }
2616
2617 let mut tokenizer = ContentTokenizer::new(b"0.00001");
2619 let token = tokenizer.next_token().unwrap().unwrap();
2620 match token {
2621 Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2622 _ => panic!("Expected small float"),
2623 }
2624
2625 let mut tokenizer = ContentTokenizer::new(b".5");
2627 let token = tokenizer.next_token().unwrap().unwrap();
2628 match token {
2629 Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2630 _ => panic!("Expected float starting with dot"),
2631 }
2632 }
2633
2634 #[test]
2635 fn test_parser_complex_path_operations() {
2636 let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2637 let operators = ContentParser::parse(content).unwrap();
2638
2639 assert_eq!(operators.len(), 6);
2640 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2641 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2642 assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2643 assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2644 assert_eq!(operators[4], ContentOperation::ClosePath);
2645 assert_eq!(operators[5], ContentOperation::Fill);
2646 }
2647
2648 #[test]
2649 fn test_parser_bezier_curves() {
2650 let content = b"100 100 150 50 200 150 c";
2651 let operators = ContentParser::parse(content).unwrap();
2652
2653 assert_eq!(operators.len(), 1);
2654 match &operators[0] {
2655 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2656 assert!(x1.is_finite() && y1.is_finite());
2660 assert!(x2.is_finite() && y2.is_finite());
2661 assert!(x3.is_finite() && y3.is_finite());
2662 assert!(*x1 >= 50.0 && *x1 <= 200.0);
2664 assert!(*y1 >= 50.0 && *y1 <= 200.0);
2665 }
2666 _ => panic!("Expected CurveTo operation"),
2667 }
2668 }
2669
2670 #[test]
2671 fn test_parser_color_operations() {
2672 let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2673 let operators = ContentParser::parse(content).unwrap();
2674
2675 assert_eq!(operators.len(), 5);
2676 match &operators[0] {
2677 ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2678 _ => panic!("Expected SetNonStrokingGray"),
2679 }
2680 match &operators[1] {
2681 ContentOperation::SetNonStrokingRGB(r, g, b) => {
2682 assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2683 }
2684 _ => panic!("Expected SetNonStrokingRGB"),
2685 }
2686 }
2687
2688 #[test]
2689 fn test_parser_text_positioning_advanced() {
2690 let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2691 let operators = ContentParser::parse(content).unwrap();
2692
2693 assert_eq!(operators.len(), 7);
2694 assert_eq!(operators[0], ContentOperation::BeginText);
2695 match &operators[1] {
2696 ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2697 assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2698 }
2699 _ => panic!("Expected SetTextMatrix"),
2700 }
2701 assert_eq!(operators[6], ContentOperation::EndText);
2702 }
2703
2704 #[test]
2705 fn test_parser_graphics_state_operations() {
2706 let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2707 let operators = ContentParser::parse(content).unwrap();
2708
2709 assert_eq!(operators.len(), 7);
2710 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2711 match &operators[1] {
2712 ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2713 assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2714 }
2715 _ => panic!("Expected SetTransformMatrix"),
2716 }
2717 assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2718 }
2719
2720 #[test]
2721 fn test_parser_xobject_operations() {
2722 let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2723 let operators = ContentParser::parse(content).unwrap();
2724
2725 assert_eq!(operators.len(), 3);
2726 for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2727 match &operators[i] {
2728 ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2729 _ => panic!("Expected PaintXObject"),
2730 }
2731 }
2732 }
2733
2734 #[test]
2735 fn test_parser_marked_content_operations() {
2736 let content = b"/P BMC (Tagged content) Tj EMC";
2737 let operators = ContentParser::parse(content).unwrap();
2738
2739 assert_eq!(operators.len(), 3);
2740 match &operators[0] {
2741 ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2742 _ => panic!("Expected BeginMarkedContent"),
2743 }
2744 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2745 }
2746
2747 #[test]
2748 fn test_parser_error_handling_invalid_operators() {
2749 let content = b"m 10 20 l";
2754 let ops = ContentParser::parse(content).expect("recovers from operand-less m");
2755 assert!(
2756 ops.iter()
2757 .any(|op| matches!(op, ContentOperation::LineTo(_, _))),
2758 "valid LineTo after the operand-less m must survive: {ops:?}"
2759 );
2760
2761 let content = b"(kept) Tj <ABC DEF";
2765 let ops = ContentParser::parse(content).expect("recovers, keeping pre-error tokens");
2766 assert!(
2767 ops.iter()
2768 .any(|op| matches!(op, ContentOperation::ShowText(t) if t == b"kept")),
2769 "text before the unterminated hex must survive: {ops:?}"
2770 );
2771
2772 let content = b"100 200 300";
2774 assert!(ContentParser::parse(content).is_ok());
2775 }
2776
2777 #[test]
2778 fn test_parser_whitespace_tolerance() {
2779 let content = b" \n\t 100 \r\n 200 \t m \n";
2780 let operators = ContentParser::parse(content).unwrap();
2781
2782 assert_eq!(operators.len(), 1);
2783 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2784 }
2785
2786 #[test]
2787 fn test_tokenizer_comment_handling() {
2788 let content = b"100 % This is a comment\n200 m % Another comment";
2789 let operators = ContentParser::parse(content).unwrap();
2790
2791 assert_eq!(operators.len(), 1);
2792 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2793 }
2794
2795 #[test]
2796 fn test_parser_stream_with_binary_data() {
2797 let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2799
2800 let operators = ContentParser::parse(content).unwrap();
2801 assert_eq!(operators.len(), 2);
2802 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2803 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2804 }
2805
2806 #[test]
2807 fn test_tokenizer_array_parsing() {
2808 let content = b"100 200 m 150 250 l";
2810 let operators = ContentParser::parse(content).unwrap();
2811
2812 assert_eq!(operators.len(), 2);
2813 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2814 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2815 }
2816
2817 #[test]
2818 fn test_parser_rectangle_operations() {
2819 let content = b"10 20 100 50 re 0 0 200 300 re";
2820 let operators = ContentParser::parse(content).unwrap();
2821
2822 assert_eq!(operators.len(), 2);
2823 match &operators[0] {
2824 ContentOperation::Rectangle(x, y, width, height) => {
2825 assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2826 }
2827 _ => panic!("Expected Rectangle operation"),
2828 }
2829 match &operators[1] {
2830 ContentOperation::Rectangle(x, y, width, height) => {
2831 assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2832 }
2833 _ => panic!("Expected Rectangle operation"),
2834 }
2835 }
2836
2837 #[test]
2838 fn test_parser_clipping_operations() {
2839 let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2840 let operators = ContentParser::parse(content).unwrap();
2841
2842 assert_eq!(operators.len(), 6);
2843 assert_eq!(operators[1], ContentOperation::Clip);
2844 assert_eq!(operators[2], ContentOperation::EndPath);
2845 assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2846 assert_eq!(operators[5], ContentOperation::EndPath);
2847 }
2848
2849 #[test]
2850 fn test_parser_painting_operations() {
2851 let content = b"S s f f* B B* b b*";
2852 let operators = ContentParser::parse(content).unwrap();
2853
2854 assert_eq!(operators.len(), 8);
2855 assert_eq!(operators[0], ContentOperation::Stroke);
2856 assert_eq!(operators[1], ContentOperation::CloseStroke);
2857 assert_eq!(operators[2], ContentOperation::Fill);
2858 assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2859 assert_eq!(operators[4], ContentOperation::FillStroke);
2860 assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2861 assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2862 assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2863 }
2864
2865 #[test]
2866 fn test_parser_line_style_operations() {
2867 let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2868 let operators = ContentParser::parse(content).unwrap();
2869
2870 assert_eq!(operators.len(), 5);
2871 assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2872 assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2873 assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2874 assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2875 }
2877
2878 #[test]
2879 fn test_parser_text_state_operations() {
2880 let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2881 let operators = ContentParser::parse(content).unwrap();
2882
2883 assert_eq!(operators.len(), 5);
2884 assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2885 assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2886 assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2887 assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2888 assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2889 }
2890
2891 #[test]
2892 fn test_parser_unicode_text() {
2893 let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2894 let operators = ContentParser::parse(content).unwrap();
2895
2896 assert_eq!(operators.len(), 3);
2897 assert_eq!(operators[0], ContentOperation::BeginText);
2898 match &operators[1] {
2899 ContentOperation::ShowText(text) => {
2900 assert!(text.len() > 5); }
2902 _ => panic!("Expected ShowText operation"),
2903 }
2904 assert_eq!(operators[2], ContentOperation::EndText);
2905 }
2906
2907 #[test]
2908 fn test_parser_stress_test_large_coordinates() {
2909 let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2910 let operators = ContentParser::parse(content).unwrap();
2911
2912 assert_eq!(operators.len(), 1);
2913 match &operators[0] {
2914 ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2915 assert!((*_x1 - 999999.999).abs() < 0.1);
2916 assert!((*_y1 - (-999999.999)).abs() < 0.1);
2917 assert!((*_x3 - 999999.999).abs() < 0.1);
2918 }
2919 _ => panic!("Expected CurveTo operation"),
2920 }
2921 }
2922
2923 #[test]
2924 fn test_parser_empty_content_stream() {
2925 let content = b"";
2926 let operators = ContentParser::parse(content).unwrap();
2927 assert!(operators.is_empty());
2928
2929 let content = b" \n\t\r ";
2930 let operators = ContentParser::parse(content).unwrap();
2931 assert!(operators.is_empty());
2932 }
2933
2934 #[test]
2935 fn test_tokenizer_error_recovery() {
2936 let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2940 let ops = ContentParser::parse(content).expect("recovers around binary comment");
2941 assert!(
2942 ops.iter()
2943 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))),
2944 "MoveTo before the comment must survive: {ops:?}"
2945 );
2946 assert!(
2947 ops.iter()
2948 .any(|op| matches!(op, ContentOperation::LineTo(_, _))),
2949 "LineTo after the comment must survive: {ops:?}"
2950 );
2951 }
2952
2953 #[test]
2954 fn malformed_operator_does_not_discard_surrounding_text() {
2955 let content = b"BT /F1 12 Tf 72 700 Td (First line) Tj Td (Second line) Tj ET";
2963 let ops = ContentParser::parse_content(content)
2964 .expect("malformed operator must not fail the whole stream");
2965 let shown: Vec<&Vec<u8>> = ops
2966 .iter()
2967 .filter_map(|op| match op {
2968 ContentOperation::ShowText(t) => Some(t),
2969 _ => None,
2970 })
2971 .collect();
2972 assert_eq!(
2973 shown.len(),
2974 2,
2975 "both valid Tj operators must survive the malformed Td"
2976 );
2977 assert_eq!(shown[0], b"First line");
2978 assert_eq!(shown[1], b"Second line");
2979 }
2980
2981 #[test]
2982 fn test_parser_optimization_repeated_operations() {
2983 let mut content = Vec::new();
2985 for i in 0..1000 {
2986 content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2987 }
2988
2989 let start = std::time::Instant::now();
2990 let operators = ContentParser::parse(&content).unwrap();
2991 let duration = start.elapsed();
2992
2993 assert_eq!(operators.len(), 1000);
2994 assert!(duration.as_millis() < 200); }
2996
2997 #[test]
2998 fn test_parser_memory_efficiency_large_strings() {
2999 let large_text = "A".repeat(10000);
3001 let content = format!("BT ({}) Tj ET", large_text);
3002 let operators = ContentParser::parse(content.as_bytes()).unwrap();
3003
3004 assert_eq!(operators.len(), 3);
3005 match &operators[1] {
3006 ContentOperation::ShowText(text) => {
3007 assert_eq!(text.len(), 10000);
3008 }
3009 _ => panic!("Expected ShowText operation"),
3010 }
3011 }
3012 }
3013
3014 #[test]
3015 fn test_content_stream_too_large() {
3016 let mut large_content = Vec::new();
3018
3019 for i in 0..10000 {
3021 large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
3022 }
3023 large_content.extend_from_slice(b"S");
3024
3025 let result = ContentParser::parse_content(&large_content);
3027 assert!(result.is_ok());
3028
3029 let operations = result.unwrap();
3030 assert!(operations.len() > 10000);
3032 }
3033
3034 #[test]
3035 fn test_invalid_operator_handling() {
3036 let content = b"100 200 INVALID_OP 300 400 m";
3038 let result = ContentParser::parse_content(content);
3039
3040 if let Ok(operations) = result {
3042 assert!(operations
3044 .iter()
3045 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
3046 }
3047 }
3048
3049 #[test]
3050 fn test_nested_arrays_malformed() {
3051 let content = b"[[(Hello] [World)]] TJ";
3053 let result = ContentParser::parse_content(content);
3054
3055 assert!(result.is_ok() || result.is_err());
3057 }
3058
3059 #[test]
3060 fn test_escape_sequences_in_strings() {
3061 let test_cases = vec![
3063 (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
3064 (b"(\\\\)".as_slice(), b"\\".as_slice()),
3065 (b"(\\(\\))".as_slice(), b"()".as_slice()),
3066 (b"(\\123)".as_slice(), b"S".as_slice()), (b"(\\0)".as_slice(), b"\0".as_slice()),
3068 ];
3069
3070 for (input, expected) in test_cases {
3071 let mut content = Vec::new();
3072 content.extend_from_slice(input);
3073 content.extend_from_slice(b" Tj");
3074
3075 let result = ContentParser::parse_content(&content);
3076 assert!(result.is_ok());
3077
3078 let operations = result.unwrap();
3079 if let ContentOperation::ShowText(text) = &operations[0] {
3080 assert_eq!(text, expected, "Failed for input: {:?}", input);
3081 } else {
3082 panic!("Expected ShowText operation");
3083 }
3084 }
3085 }
3086
3087 #[test]
3088 fn test_content_with_inline_images() {
3089 let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
3091 let result = ContentParser::parse_content(content);
3092
3093 assert!(result.is_ok() || result.is_err());
3095 }
3096
3097 #[test]
3098 fn test_operator_with_missing_operands() {
3099 let test_cases = vec![
3101 b"Tj" as &[u8], b"m", b"rg", b"Tf", ];
3106
3107 for content in test_cases {
3108 let result = ContentParser::parse_content(content);
3109 assert!(result.is_ok() || result.is_err());
3111 }
3112 }
3113
3114 #[test]
3117 fn test_tokenizer_handles_curly_braces() {
3118 let input = b"q { } Q";
3121 let mut tokenizer = ContentTokenizer::new(input);
3122
3123 let mut tokens = Vec::new();
3124 while let Some(token) = tokenizer.next_token().unwrap() {
3125 tokens.push(token);
3126 }
3127
3128 assert!(tokens.contains(&Token::Operator("q".to_string())));
3130 assert!(tokens.contains(&Token::Operator("Q".to_string())));
3131 }
3132
3133 #[test]
3134 fn test_tokenizer_handles_closing_paren() {
3135 let input = b"q ) Q";
3137 let mut tokenizer = ContentTokenizer::new(input);
3138
3139 let mut tokens = Vec::new();
3140 while let Some(token) = tokenizer.next_token().unwrap() {
3141 tokens.push(token);
3142 }
3143
3144 assert!(tokens.contains(&Token::Operator("q".to_string())));
3145 assert!(tokens.contains(&Token::Operator("Q".to_string())));
3146 }
3147
3148 #[test]
3149 fn test_inline_image_binary_with_curly_braces() {
3150 let content = b"BI /W 2 /H 2 /BPC 8 /CS /G ID \x7B\x7D\x00\xFF EI Q";
3153 let result = ContentParser::parse_content(content);
3154 assert!(
3155 result.is_ok(),
3156 "Parsing inline image with curly braces failed: {:?}",
3157 result.err()
3158 );
3159
3160 let ops = result.unwrap();
3161 let has_inline = ops
3163 .iter()
3164 .any(|op| matches!(op, ContentOperation::InlineImage { .. }));
3165 let has_q = ops
3166 .iter()
3167 .any(|op| matches!(op, ContentOperation::RestoreGraphicsState));
3168 assert!(has_inline, "Expected InlineImage operation");
3169 assert!(has_q, "Expected RestoreGraphicsState after EI");
3170 }
3171
3172 #[test]
3173 fn test_inline_image_binary_with_all_byte_values() {
3174 let mut content = Vec::new();
3176 content.extend_from_slice(b"BI /W 16 /H 16 /BPC 8 /CS /G ID ");
3177 for b in 0u8..=255 {
3179 content.push(b);
3180 }
3181 content.extend_from_slice(b" EI Q");
3182
3183 let result = ContentParser::parse_content(&content);
3184 assert!(
3185 result.is_ok(),
3186 "Parsing inline image with all byte values failed: {:?}",
3187 result.err()
3188 );
3189 }
3190
3191 #[test]
3192 fn test_inline_image_ei_detection() {
3193 let content = b"BI /W 2 /H 1 /BPC 8 /CS /G ID \x45\x49\x00\n EI Q";
3196 let result = ContentParser::parse_content(content);
3198 assert!(result.is_ok(), "EI detection failed: {:?}", result.err());
3199
3200 let ops = result.unwrap();
3201 let has_inline = ops
3202 .iter()
3203 .any(|op| matches!(op, ContentOperation::InlineImage { .. }));
3204 assert!(has_inline, "Expected InlineImage operation");
3205 }
3206
3207 #[test]
3208 fn test_tokenizer_no_infinite_loop_on_consecutive_delimiters() {
3209 let input = b"q {{{}}})))) Q";
3211 let mut tokenizer = ContentTokenizer::new(input);
3212
3213 let mut tokens = Vec::new();
3214 while let Some(token) = tokenizer.next_token().unwrap() {
3215 tokens.push(token);
3216 if tokens.len() > 100 {
3217 panic!("Tokenizer produced too many tokens — possible infinite loop");
3218 }
3219 }
3220
3221 assert!(tokens.contains(&Token::Operator("q".to_string())));
3222 assert!(tokens.contains(&Token::Operator("Q".to_string())));
3223 }
3224
3225 #[test]
3226 fn test_content_parser_inline_image_produces_correct_operation() {
3227 let content = b"BI /W 4 /H 4 /BPC 8 /CS /G ID \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F EI";
3229 let result = ContentParser::parse_content(content);
3230 assert!(result.is_ok(), "Parse failed: {:?}", result.err());
3231
3232 let ops = result.unwrap();
3233 assert_eq!(
3234 ops.len(),
3235 1,
3236 "Expected exactly 1 operation, got {}",
3237 ops.len()
3238 );
3239
3240 if let ContentOperation::InlineImage { params, data } = &ops[0] {
3241 assert_eq!(params.get("Width"), Some(&Object::Integer(4)));
3242 assert_eq!(params.get("Height"), Some(&Object::Integer(4)));
3243 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
3244 assert!(!data.is_empty(), "Image data should not be empty");
3245 } else {
3246 panic!("Expected InlineImage operation, got {:?}", ops[0]);
3247 }
3248 }
3249
3250 #[test]
3251 fn test_octal_escape_overflow_777() {
3252 let mut tokenizer = ContentTokenizer::new(b"(\\777)");
3256 let token = tokenizer.next_token().unwrap().unwrap();
3257 match token {
3258 Token::String(data) => assert_eq!(data, vec![0xFF]),
3259 _ => panic!("Expected string token"),
3260 }
3261 }
3262
3263 #[test]
3264 fn test_octal_escape_overflow_400() {
3265 let mut tokenizer = ContentTokenizer::new(b"(\\400)");
3268 let token = tokenizer.next_token().unwrap().unwrap();
3269 match token {
3270 Token::String(data) => assert_eq!(data, vec![0x00]),
3271 _ => panic!("Expected string token"),
3272 }
3273 }
3274
3275 #[test]
3276 fn test_octal_escape_overflow_577() {
3277 let mut tokenizer = ContentTokenizer::new(b"(\\577)");
3280 let token = tokenizer.next_token().unwrap().unwrap();
3281 match token {
3282 Token::String(data) => assert_eq!(data, vec![0x7F]),
3283 _ => panic!("Expected string token"),
3284 }
3285 }
3286
3287 #[test]
3288 fn test_octal_escape_max_valid_377() {
3289 let mut tokenizer = ContentTokenizer::new(b"(\\377)");
3291 let token = tokenizer.next_token().unwrap().unwrap();
3292 match token {
3293 Token::String(data) => assert_eq!(data, vec![0xFF]),
3294 _ => panic!("Expected string token"),
3295 }
3296 }
3297
3298 #[test]
3299 fn test_octal_escape_overflow_mixed_with_valid() {
3300 let mut tokenizer = ContentTokenizer::new(b"(A\\777B\\101C)");
3302 let token = tokenizer.next_token().unwrap().unwrap();
3303 match token {
3304 Token::String(data) => {
3305 assert_eq!(data, vec![b'A', 0xFF, b'B', b'A', b'C']);
3306 }
3307 _ => panic!("Expected string token"),
3308 }
3309 }
3310}