1use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60#[derive(Debug, Clone, PartialEq)]
94pub enum ContentOperation {
95 BeginText,
99
100 EndText,
103
104 SetCharSpacing(f32),
108
109 SetWordSpacing(f32),
112
113 SetHorizontalScaling(f32),
116
117 SetLeading(f32),
120
121 SetFont(String, f32),
124
125 SetTextRenderMode(i32),
128
129 SetTextRise(f32),
132
133 MoveText(f32, f32),
137
138 MoveTextSetLeading(f32, f32),
141
142 SetTextMatrix(f32, f32, f32, f32, f32, f32),
145
146 NextLine,
149
150 ShowText(Vec<u8>),
154
155 ShowTextArray(Vec<TextElement>),
158
159 NextLineShowText(Vec<u8>),
162
163 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
166
167 SaveGraphicsState,
171
172 RestoreGraphicsState,
175
176 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
179
180 SetLineWidth(f32),
182
183 SetLineCap(i32),
186
187 SetLineJoin(i32),
190
191 SetMiterLimit(f32),
194
195 SetDashPattern(Vec<f32>, f32),
198
199 SetIntent(String),
202
203 SetFlatness(f32),
206
207 SetGraphicsStateParams(String),
210
211 MoveTo(f32, f32),
214
215 LineTo(f32, f32),
217
218 CurveTo(f32, f32, f32, f32, f32, f32),
221
222 CurveToV(f32, f32, f32, f32),
224
225 CurveToY(f32, f32, f32, f32),
227
228 ClosePath,
231
232 Rectangle(f32, f32, f32, f32),
235
236 Stroke,
239
240 CloseStroke,
243
244 Fill,
246
247 FillEvenOdd,
249
250 FillStroke,
253
254 FillStrokeEvenOdd,
256
257 CloseFillStroke,
260
261 CloseFillStrokeEvenOdd,
263
264 EndPath,
267
268 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
276
277 SetNonStrokingColorSpace(String),
280
281 SetStrokingColor(Vec<f32>),
284
285 SetNonStrokingColor(Vec<f32>),
288
289 SetStrokingGray(f32),
292
293 SetNonStrokingGray(f32),
295
296 SetStrokingRGB(f32, f32, f32),
299
300 SetNonStrokingRGB(f32, f32, f32),
302
303 SetStrokingCMYK(f32, f32, f32, f32),
305
306 SetNonStrokingCMYK(f32, f32, f32, f32),
308
309 ShadingFill(String), BeginInlineImage,
315 InlineImage {
317 params: HashMap<String, Object>,
319 data: Vec<u8>,
321 },
322
323 PaintXObject(String),
327
328 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
339
340#[derive(Debug, Clone, PartialEq)]
359pub enum TextElement {
360 Text(Vec<u8>),
362 Spacing(f32),
365}
366
367#[derive(Debug, Clone, PartialEq)]
369pub(super) enum Token {
370 Number(f32),
371 Integer(i32),
372 String(Vec<u8>),
373 HexString(Vec<u8>),
374 Name(String),
375 Operator(String),
376 ArrayStart,
377 ArrayEnd,
378 DictStart,
379 DictEnd,
380}
381
382pub struct ContentTokenizer<'a> {
384 input: &'a [u8],
385 position: usize,
386}
387
388impl<'a> ContentTokenizer<'a> {
389 pub fn new(input: &'a [u8]) -> Self {
391 Self { input, position: 0 }
392 }
393
394 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
396 self.skip_whitespace();
397
398 if self.position >= self.input.len() {
399 return Ok(None);
400 }
401
402 let ch = self.input[self.position];
403
404 match ch {
405 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
407
408 b'(' => self.read_literal_string(),
410 b'<' => {
411 if self.peek_next() == Some(b'<') {
412 self.position += 2;
413 Ok(Some(Token::DictStart))
414 } else {
415 self.read_hex_string()
416 }
417 }
418 b'>' => {
419 if self.peek_next() == Some(b'>') {
420 self.position += 2;
421 Ok(Some(Token::DictEnd))
422 } else {
423 Err(ParseError::SyntaxError {
424 position: self.position,
425 message: "Unexpected '>'".to_string(),
426 })
427 }
428 }
429
430 b'[' => {
432 self.position += 1;
433 Ok(Some(Token::ArrayStart))
434 }
435 b']' => {
436 self.position += 1;
437 Ok(Some(Token::ArrayEnd))
438 }
439
440 b'/' => self.read_name(),
442
443 b';' => {
445 self.position += 1;
446 self.next_token() }
448
449 _ => self.read_operator(),
451 }
452 }
453
454 fn skip_whitespace(&mut self) {
455 while self.position < self.input.len() {
456 match self.input[self.position] {
457 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
458 b'%' => self.skip_comment(),
459 _ => break,
460 }
461 }
462 }
463
464 fn skip_comment(&mut self) {
465 while self.position < self.input.len() && self.input[self.position] != b'\n' {
466 self.position += 1;
467 }
468 }
469
470 fn peek_next(&self) -> Option<u8> {
471 if self.position + 1 < self.input.len() {
472 Some(self.input[self.position + 1])
473 } else {
474 None
475 }
476 }
477
478 fn read_number(&mut self) -> ParseResult<Option<Token>> {
479 let start = self.position;
480 let mut has_dot = false;
481
482 if self.position < self.input.len()
484 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
485 {
486 self.position += 1;
487 }
488
489 while self.position < self.input.len() {
491 match self.input[self.position] {
492 b'0'..=b'9' => self.position += 1,
493 b'.' if !has_dot => {
494 has_dot = true;
495 self.position += 1;
496 }
497 _ => break,
498 }
499 }
500
501 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
502 ParseError::SyntaxError {
503 position: start,
504 message: "Invalid number format".to_string(),
505 }
506 })?;
507
508 if has_dot {
509 let value = num_str
510 .parse::<f32>()
511 .map_err(|_| ParseError::SyntaxError {
512 position: start,
513 message: "Invalid float number".to_string(),
514 })?;
515 Ok(Some(Token::Number(value)))
516 } else {
517 let value = num_str
518 .parse::<i32>()
519 .map_err(|_| ParseError::SyntaxError {
520 position: start,
521 message: "Invalid integer number".to_string(),
522 })?;
523 Ok(Some(Token::Integer(value)))
524 }
525 }
526
527 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
528 self.position += 1; let mut result = Vec::new();
530 let mut paren_depth = 1;
531 let mut escape = false;
532
533 while self.position < self.input.len() && paren_depth > 0 {
534 let ch = self.input[self.position];
535 self.position += 1;
536
537 if escape {
538 match ch {
539 b'n' => result.push(b'\n'),
540 b'r' => result.push(b'\r'),
541 b't' => result.push(b'\t'),
542 b'b' => result.push(b'\x08'),
543 b'f' => result.push(b'\x0C'),
544 b'(' => result.push(b'('),
545 b')' => result.push(b')'),
546 b'\\' => result.push(b'\\'),
547 b'0'..=b'7' => {
548 self.position -= 1;
550 let octal_value = self.read_octal_escape()?;
551 result.push(octal_value);
552 }
553 _ => result.push(ch), }
555 escape = false;
556 } else {
557 match ch {
558 b'\\' => escape = true,
559 b'(' => {
560 paren_depth += 1;
561 result.push(ch);
562 }
563 b')' => {
564 paren_depth -= 1;
565 if paren_depth > 0 {
566 result.push(ch);
567 }
568 }
569 _ => result.push(ch),
570 }
571 }
572 }
573
574 Ok(Some(Token::String(result)))
575 }
576
577 fn read_octal_escape(&mut self) -> ParseResult<u8> {
578 let mut value = 0u8;
579 let mut count = 0;
580
581 while count < 3 && self.position < self.input.len() {
582 match self.input[self.position] {
583 b'0'..=b'7' => {
584 value = value * 8 + (self.input[self.position] - b'0');
585 self.position += 1;
586 count += 1;
587 }
588 _ => break,
589 }
590 }
591
592 Ok(value)
593 }
594
595 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
596 self.position += 1; let mut result = Vec::new();
598 let mut nibble = None;
599
600 while self.position < self.input.len() {
601 let ch = self.input[self.position];
602
603 match ch {
604 b'>' => {
605 self.position += 1;
606 if let Some(n) = nibble {
608 result.push(n << 4);
609 }
610 return Ok(Some(Token::HexString(result)));
611 }
612 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
613 let digit = if ch <= b'9' {
614 ch - b'0'
615 } else if ch <= b'F' {
616 ch - b'A' + 10
617 } else {
618 ch - b'a' + 10
619 };
620
621 if let Some(n) = nibble {
622 result.push((n << 4) | digit);
623 nibble = None;
624 } else {
625 nibble = Some(digit);
626 }
627 self.position += 1;
628 }
629 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
630 self.position += 1;
632 }
633 _ => {
634 return Err(ParseError::SyntaxError {
635 position: self.position,
636 message: format!("Invalid character in hex string: {:?}", ch as char),
637 });
638 }
639 }
640 }
641
642 Err(ParseError::SyntaxError {
643 position: self.position,
644 message: "Unterminated hex string".to_string(),
645 })
646 }
647
648 fn read_name(&mut self) -> ParseResult<Option<Token>> {
649 self.position += 1; let start = self.position;
651
652 while self.position < self.input.len() {
653 let ch = self.input[self.position];
654 match ch {
655 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
656 | b']' | b'{' | b'}' | b'/' | b'%' => break,
657 b'#' => {
658 self.position += 1;
660 if self.position + 1 < self.input.len() {
661 self.position += 2;
662 }
663 }
664 _ => self.position += 1,
665 }
666 }
667
668 let name_bytes = &self.input[start..self.position];
669 let name = self.decode_name(name_bytes)?;
670 Ok(Some(Token::Name(name)))
671 }
672
673 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
674 let mut result = Vec::new();
675 let mut i = 0;
676
677 while i < bytes.len() {
678 if bytes[i] == b'#' && i + 2 < bytes.len() {
679 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
681 ParseError::SyntaxError {
682 position: self.position,
683 message: "Invalid hex escape in name".to_string(),
684 }
685 })?;
686 let value =
687 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
688 position: self.position,
689 message: "Invalid hex escape in name".to_string(),
690 })?;
691 result.push(value);
692 i += 3;
693 } else {
694 result.push(bytes[i]);
695 i += 1;
696 }
697 }
698
699 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
700 position: self.position,
701 message: "Invalid UTF-8 in name".to_string(),
702 })
703 }
704
705 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
706 let start = self.position;
707
708 while self.position < self.input.len() {
709 let ch = self.input[self.position];
710 match ch {
711 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
712 | b']' | b'{' | b'}' | b'/' | b'%' | b';' => break,
713 _ => self.position += 1,
714 }
715 }
716
717 let op_bytes = &self.input[start..self.position];
718 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
719 position: start,
720 message: "Invalid operator".to_string(),
721 })?;
722
723 Ok(Some(Token::Operator(op.to_string())))
724 }
725}
726
727pub struct ContentParser {
746 tokens: Vec<Token>,
747 position: usize,
748}
749
750impl ContentParser {
751 pub fn new(_content: &[u8]) -> Self {
753 Self {
754 tokens: Vec::new(),
755 position: 0,
756 }
757 }
758
759 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
793 Self::parse_content(content)
794 }
795
796 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
801 let mut tokenizer = ContentTokenizer::new(content);
802 let mut tokens = Vec::new();
803
804 while let Some(token) = tokenizer.next_token()? {
806 tokens.push(token);
807 }
808
809 let mut parser = Self {
810 tokens,
811 position: 0,
812 };
813
814 parser.parse_operators()
815 }
816
817 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
818 let mut operators = Vec::new();
819 let mut operand_stack: Vec<Token> = Vec::new();
820
821 while self.position < self.tokens.len() {
822 let token = self.tokens[self.position].clone();
823 self.position += 1;
824
825 match &token {
826 Token::Operator(op) => {
827 let operator = self.parse_operator(op, &mut operand_stack)?;
828 operators.push(operator);
829 }
830 _ => {
831 operand_stack.push(token);
833 }
834 }
835 }
836
837 Ok(operators)
838 }
839
840 fn parse_operator(
841 &mut self,
842 op: &str,
843 operands: &mut Vec<Token>,
844 ) -> ParseResult<ContentOperation> {
845 let operator = match op {
846 "BT" => ContentOperation::BeginText,
848 "ET" => ContentOperation::EndText,
849
850 "Tc" => {
852 let spacing = self.pop_number(operands)?;
853 ContentOperation::SetCharSpacing(spacing)
854 }
855 "Tw" => {
856 let spacing = self.pop_number(operands)?;
857 ContentOperation::SetWordSpacing(spacing)
858 }
859 "Tz" => {
860 let scale = self.pop_number(operands)?;
861 ContentOperation::SetHorizontalScaling(scale)
862 }
863 "TL" => {
864 let leading = self.pop_number(operands)?;
865 ContentOperation::SetLeading(leading)
866 }
867 "Tf" => {
868 let size = self.pop_number(operands)?;
869 let font = self.pop_name(operands)?;
870 ContentOperation::SetFont(font, size)
871 }
872 "Tr" => {
873 let mode = self.pop_integer(operands)?;
874 ContentOperation::SetTextRenderMode(mode)
875 }
876 "Ts" => {
877 let rise = self.pop_number(operands)?;
878 ContentOperation::SetTextRise(rise)
879 }
880
881 "Td" => {
883 let ty = self.pop_number(operands)?;
884 let tx = self.pop_number(operands)?;
885 ContentOperation::MoveText(tx, ty)
886 }
887 "TD" => {
888 let ty = self.pop_number(operands)?;
889 let tx = self.pop_number(operands)?;
890 ContentOperation::MoveTextSetLeading(tx, ty)
891 }
892 "Tm" => {
893 let f = self.pop_number(operands)?;
894 let e = self.pop_number(operands)?;
895 let d = self.pop_number(operands)?;
896 let c = self.pop_number(operands)?;
897 let b = self.pop_number(operands)?;
898 let a = self.pop_number(operands)?;
899 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
900 }
901 "T*" => ContentOperation::NextLine,
902
903 "Tj" => {
905 let text = self.pop_string(operands)?;
906 ContentOperation::ShowText(text)
907 }
908 "TJ" => {
909 let array = self.pop_array(operands)?;
910 let elements = self.parse_text_array(array)?;
911 ContentOperation::ShowTextArray(elements)
912 }
913 "'" => {
914 let text = self.pop_string(operands)?;
915 ContentOperation::NextLineShowText(text)
916 }
917 "\"" => {
918 let text = self.pop_string(operands)?;
919 let aw = self.pop_number(operands)?;
920 let ac = self.pop_number(operands)?;
921 ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
922 }
923
924 "q" => ContentOperation::SaveGraphicsState,
926 "Q" => ContentOperation::RestoreGraphicsState,
927 "cm" => {
928 let f = self.pop_number(operands)?;
929 let e = self.pop_number(operands)?;
930 let d = self.pop_number(operands)?;
931 let c = self.pop_number(operands)?;
932 let b = self.pop_number(operands)?;
933 let a = self.pop_number(operands)?;
934 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
935 }
936 "w" => {
937 let width = self.pop_number(operands)?;
938 ContentOperation::SetLineWidth(width)
939 }
940 "J" => {
941 let cap = self.pop_integer(operands)?;
942 ContentOperation::SetLineCap(cap)
943 }
944 "j" => {
945 let join = self.pop_integer(operands)?;
946 ContentOperation::SetLineJoin(join)
947 }
948 "M" => {
949 let limit = self.pop_number(operands)?;
950 ContentOperation::SetMiterLimit(limit)
951 }
952 "d" => {
953 let phase = self.pop_number(operands)?;
954 let array = self.pop_array(operands)?;
955 let pattern = self.parse_dash_array(array)?;
956 ContentOperation::SetDashPattern(pattern, phase)
957 }
958 "ri" => {
959 let intent = self.pop_name(operands)?;
960 ContentOperation::SetIntent(intent)
961 }
962 "i" => {
963 let flatness = self.pop_number(operands)?;
964 ContentOperation::SetFlatness(flatness)
965 }
966 "gs" => {
967 let name = self.pop_name(operands)?;
968 ContentOperation::SetGraphicsStateParams(name)
969 }
970
971 "m" => {
973 let y = self.pop_number(operands)?;
974 let x = self.pop_number(operands)?;
975 ContentOperation::MoveTo(x, y)
976 }
977 "l" => {
978 let y = self.pop_number(operands)?;
979 let x = self.pop_number(operands)?;
980 ContentOperation::LineTo(x, y)
981 }
982 "c" => {
983 let y3 = self.pop_number(operands)?;
984 let x3 = self.pop_number(operands)?;
985 let y2 = self.pop_number(operands)?;
986 let x2 = self.pop_number(operands)?;
987 let y1 = self.pop_number(operands)?;
988 let x1 = self.pop_number(operands)?;
989 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
990 }
991 "v" => {
992 let y3 = self.pop_number(operands)?;
993 let x3 = self.pop_number(operands)?;
994 let y2 = self.pop_number(operands)?;
995 let x2 = self.pop_number(operands)?;
996 ContentOperation::CurveToV(x2, y2, x3, y3)
997 }
998 "y" => {
999 let y3 = self.pop_number(operands)?;
1000 let x3 = self.pop_number(operands)?;
1001 let y1 = self.pop_number(operands)?;
1002 let x1 = self.pop_number(operands)?;
1003 ContentOperation::CurveToY(x1, y1, x3, y3)
1004 }
1005 "h" => ContentOperation::ClosePath,
1006 "re" => {
1007 let height = self.pop_number(operands)?;
1008 let width = self.pop_number(operands)?;
1009 let y = self.pop_number(operands)?;
1010 let x = self.pop_number(operands)?;
1011 ContentOperation::Rectangle(x, y, width, height)
1012 }
1013
1014 "S" => ContentOperation::Stroke,
1016 "s" => ContentOperation::CloseStroke,
1017 "f" | "F" => ContentOperation::Fill,
1018 "f*" => ContentOperation::FillEvenOdd,
1019 "B" => ContentOperation::FillStroke,
1020 "B*" => ContentOperation::FillStrokeEvenOdd,
1021 "b" => ContentOperation::CloseFillStroke,
1022 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1023 "n" => ContentOperation::EndPath,
1024
1025 "W" => ContentOperation::Clip,
1027 "W*" => ContentOperation::ClipEvenOdd,
1028
1029 "CS" => {
1031 let name = self.pop_name(operands)?;
1032 ContentOperation::SetStrokingColorSpace(name)
1033 }
1034 "cs" => {
1035 let name = self.pop_name(operands)?;
1036 ContentOperation::SetNonStrokingColorSpace(name)
1037 }
1038 "SC" | "SCN" => {
1039 let components = self.pop_color_components(operands)?;
1040 ContentOperation::SetStrokingColor(components)
1041 }
1042 "sc" | "scn" => {
1043 let components = self.pop_color_components(operands)?;
1044 ContentOperation::SetNonStrokingColor(components)
1045 }
1046 "G" => {
1047 let gray = self.pop_number(operands)?;
1048 ContentOperation::SetStrokingGray(gray)
1049 }
1050 "g" => {
1051 let gray = self.pop_number(operands)?;
1052 ContentOperation::SetNonStrokingGray(gray)
1053 }
1054 "RG" => {
1055 let b = self.pop_number(operands)?;
1056 let g = self.pop_number(operands)?;
1057 let r = self.pop_number(operands)?;
1058 ContentOperation::SetStrokingRGB(r, g, b)
1059 }
1060 "rg" => {
1061 let b = self.pop_number(operands)?;
1062 let g = self.pop_number(operands)?;
1063 let r = self.pop_number(operands)?;
1064 ContentOperation::SetNonStrokingRGB(r, g, b)
1065 }
1066 "K" => {
1067 let k = self.pop_number(operands)?;
1068 let y = self.pop_number(operands)?;
1069 let m = self.pop_number(operands)?;
1070 let c = self.pop_number(operands)?;
1071 ContentOperation::SetStrokingCMYK(c, m, y, k)
1072 }
1073 "k" => {
1074 let k = self.pop_number(operands)?;
1075 let y = self.pop_number(operands)?;
1076 let m = self.pop_number(operands)?;
1077 let c = self.pop_number(operands)?;
1078 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1079 }
1080
1081 "sh" => {
1083 let name = self.pop_name(operands)?;
1084 ContentOperation::ShadingFill(name)
1085 }
1086
1087 "Do" => {
1089 let name = self.pop_name(operands)?;
1090 ContentOperation::PaintXObject(name)
1091 }
1092
1093 "BMC" => {
1095 let tag = self.pop_name(operands)?;
1096 ContentOperation::BeginMarkedContent(tag)
1097 }
1098 "BDC" => {
1099 let props = self.pop_dict_or_name(operands)?;
1100 let tag = self.pop_name(operands)?;
1101 ContentOperation::BeginMarkedContentWithProps(tag, props)
1102 }
1103 "EMC" => ContentOperation::EndMarkedContent,
1104 "MP" => {
1105 let tag = self.pop_name(operands)?;
1106 ContentOperation::DefineMarkedContentPoint(tag)
1107 }
1108 "DP" => {
1109 let props = self.pop_dict_or_name(operands)?;
1110 let tag = self.pop_name(operands)?;
1111 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1112 }
1113
1114 "BX" => ContentOperation::BeginCompatibility,
1116 "EX" => ContentOperation::EndCompatibility,
1117
1118 "BI" => {
1120 operands.clear(); self.parse_inline_image()?
1122 }
1123
1124 _ => {
1125 return Err(ParseError::SyntaxError {
1126 position: self.position,
1127 message: format!("Unknown operator: {op}"),
1128 });
1129 }
1130 };
1131
1132 operands.clear(); Ok(operator)
1134 }
1135
1136 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1138 match operands.pop() {
1139 Some(Token::Number(n)) => Ok(n),
1140 Some(Token::Integer(i)) => Ok(i as f32),
1141 _ => Err(ParseError::SyntaxError {
1142 position: self.position,
1143 message: "Expected number operand".to_string(),
1144 }),
1145 }
1146 }
1147
1148 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1149 match operands.pop() {
1150 Some(Token::Integer(i)) => Ok(i),
1151 _ => Err(ParseError::SyntaxError {
1152 position: self.position,
1153 message: "Expected integer operand".to_string(),
1154 }),
1155 }
1156 }
1157
1158 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1159 match operands.pop() {
1160 Some(Token::Name(n)) => Ok(n),
1161 _ => Err(ParseError::SyntaxError {
1162 position: self.position,
1163 message: "Expected name operand".to_string(),
1164 }),
1165 }
1166 }
1167
1168 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1169 match operands.pop() {
1170 Some(Token::String(s)) => Ok(s),
1171 Some(Token::HexString(s)) => Ok(s),
1172 _ => Err(ParseError::SyntaxError {
1173 position: self.position,
1174 message: "Expected string operand".to_string(),
1175 }),
1176 }
1177 }
1178
1179 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1180 let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1182 if has_array_end {
1183 operands.pop(); }
1185
1186 let mut array = Vec::new();
1187 let mut found_start = false;
1188
1189 while let Some(token) = operands.pop() {
1191 match token {
1192 Token::ArrayStart => {
1193 found_start = true;
1194 break;
1195 }
1196 Token::ArrayEnd => {
1197 continue;
1199 }
1200 _ => array.push(token),
1201 }
1202 }
1203
1204 if !found_start {
1205 return Err(ParseError::SyntaxError {
1206 position: self.position,
1207 message: "Expected array".to_string(),
1208 });
1209 }
1210
1211 array.reverse(); Ok(array)
1213 }
1214
1215 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1216 if let Some(token) = operands.pop() {
1217 match token {
1218 Token::Name(name) => {
1219 let mut props = HashMap::new();
1222 props.insert("__resource_ref".to_string(), name);
1223 Ok(props)
1224 }
1225 Token::DictEnd => {
1226 let mut props = HashMap::new();
1230
1231 while let Some(value_token) = operands.pop() {
1233 if matches!(value_token, Token::DictStart) {
1234 break;
1235 }
1236
1237 let value = match &value_token {
1241 Token::Name(name) => name.clone(),
1242 Token::String(s) => String::from_utf8_lossy(s).to_string(),
1243 Token::Integer(i) => i.to_string(),
1244 Token::Number(f) => f.to_string(),
1245 Token::ArrayEnd => {
1246 let mut array_elements = Vec::new();
1248 while let Some(arr_token) = operands.pop() {
1249 match arr_token {
1250 Token::ArrayStart => break,
1251 Token::Name(n) => array_elements.push(n),
1252 Token::String(s) => array_elements
1253 .push(String::from_utf8_lossy(&s).to_string()),
1254 Token::Integer(i) => array_elements.push(i.to_string()),
1255 Token::Number(f) => array_elements.push(f.to_string()),
1256 _ => {} }
1258 }
1259 array_elements.reverse();
1260 format!("[{}]", array_elements.join(", "))
1261 }
1262 _ => continue, };
1264
1265 if let Some(Token::Name(key)) = operands.pop() {
1267 props.insert(key, value);
1268 }
1269 }
1270
1271 Ok(props)
1272 }
1273 _ => {
1274 Ok(HashMap::new())
1276 }
1277 }
1278 } else {
1279 Err(ParseError::SyntaxError {
1281 position: 0,
1282 message: "Expected dictionary or name for marked content properties".to_string(),
1283 })
1284 }
1285 }
1286
1287 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1288 let mut components = Vec::new();
1289
1290 while let Some(token) = operands.last() {
1292 match token {
1293 Token::Number(n) => {
1294 components.push(*n);
1295 operands.pop();
1296 }
1297 Token::Integer(i) => {
1298 components.push(*i as f32);
1299 operands.pop();
1300 }
1301 _ => break,
1302 }
1303 }
1304
1305 components.reverse();
1306 Ok(components)
1307 }
1308
1309 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1310 let mut elements = Vec::new();
1311
1312 for token in tokens {
1313 match token {
1314 Token::String(s) | Token::HexString(s) => {
1315 elements.push(TextElement::Text(s));
1316 }
1317 Token::Number(n) => {
1318 elements.push(TextElement::Spacing(n));
1319 }
1320 Token::Integer(i) => {
1321 elements.push(TextElement::Spacing(i as f32));
1322 }
1323 _ => {
1324 return Err(ParseError::SyntaxError {
1325 position: self.position,
1326 message: "Invalid element in text array".to_string(),
1327 });
1328 }
1329 }
1330 }
1331
1332 Ok(elements)
1333 }
1334
1335 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1336 let mut pattern = Vec::new();
1337
1338 for token in tokens {
1339 match token {
1340 Token::Number(n) => pattern.push(n),
1341 Token::Integer(i) => pattern.push(i as f32),
1342 _ => {
1343 return Err(ParseError::SyntaxError {
1344 position: self.position,
1345 message: "Invalid element in dash array".to_string(),
1346 });
1347 }
1348 }
1349 }
1350
1351 Ok(pattern)
1352 }
1353
1354 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1355 let mut params = HashMap::new();
1357
1358 while self.position < self.tokens.len() {
1359 if let Token::Operator(op) = &self.tokens[self.position] {
1361 if op == "ID" {
1362 self.position += 1;
1363 break;
1364 }
1365 }
1366
1367 if let Token::Name(key) = &self.tokens[self.position] {
1372 self.position += 1;
1373 if self.position >= self.tokens.len() {
1374 break;
1375 }
1376
1377 let value = match &self.tokens[self.position] {
1379 Token::Integer(n) => Object::Integer(*n as i64),
1380 Token::Number(n) => Object::Real(*n as f64),
1381 Token::Name(s) => Object::Name(expand_inline_name(s)),
1382 Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1383 Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1384 _ => Object::Null,
1385 };
1386
1387 let full_key = expand_inline_key(key);
1389 params.insert(full_key, value);
1390 self.position += 1;
1391 } else {
1392 self.position += 1;
1393 }
1394 }
1395
1396 let mut data = Vec::new();
1399
1400 while self.position < self.tokens.len() {
1406 if let Token::Operator(op) = &self.tokens[self.position] {
1407 if op == "EI" {
1408 self.position += 1;
1409 break;
1410 }
1411 }
1412
1413 match &self.tokens[self.position] {
1415 Token::String(bytes) => data.extend_from_slice(bytes),
1416 Token::HexString(bytes) => data.extend_from_slice(bytes),
1417 Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1418 Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1419 Token::Name(s) => data.extend_from_slice(s.as_bytes()),
1420 Token::Operator(s) if s != "EI" => data.extend_from_slice(s.as_bytes()),
1421 _ => {}
1422 }
1423 self.position += 1;
1424 }
1425
1426 Ok(ContentOperation::InlineImage { params, data })
1427 }
1428}
1429
1430fn expand_inline_key(key: &str) -> String {
1432 match key {
1433 "W" => "Width".to_string(),
1434 "H" => "Height".to_string(),
1435 "CS" | "ColorSpace" => "ColorSpace".to_string(),
1436 "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1437 "F" => "Filter".to_string(),
1438 "DP" | "DecodeParms" => "DecodeParms".to_string(),
1439 "IM" => "ImageMask".to_string(),
1440 "I" => "Interpolate".to_string(),
1441 "Intent" => "Intent".to_string(),
1442 "D" => "Decode".to_string(),
1443 _ => key.to_string(),
1444 }
1445}
1446
1447fn expand_inline_name(name: &str) -> String {
1449 match name {
1450 "G" => "DeviceGray".to_string(),
1451 "RGB" => "DeviceRGB".to_string(),
1452 "CMYK" => "DeviceCMYK".to_string(),
1453 "I" => "Indexed".to_string(),
1454 "AHx" => "ASCIIHexDecode".to_string(),
1455 "A85" => "ASCII85Decode".to_string(),
1456 "LZW" => "LZWDecode".to_string(),
1457 "Fl" => "FlateDecode".to_string(),
1458 "RL" => "RunLengthDecode".to_string(),
1459 "DCT" => "DCTDecode".to_string(),
1460 "CCF" => "CCITTFaxDecode".to_string(),
1461 _ => name.to_string(),
1462 }
1463}
1464
1465#[cfg(test)]
1466mod tests {
1467 use super::*;
1468
1469 #[test]
1470 fn test_tokenize_numbers() {
1471 let input = b"123 -45 3.14159 -0.5 .5";
1472 let mut tokenizer = ContentTokenizer::new(input);
1473
1474 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1475 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1476 assert_eq!(
1477 tokenizer.next_token().unwrap(),
1478 Some(Token::Number(3.14159))
1479 );
1480 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1481 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1482 assert_eq!(tokenizer.next_token().unwrap(), None);
1483 }
1484
1485 #[test]
1486 fn test_tokenize_strings() {
1487 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1488 let mut tokenizer = ContentTokenizer::new(input);
1489
1490 assert_eq!(
1491 tokenizer.next_token().unwrap(),
1492 Some(Token::String(b"Hello World".to_vec()))
1493 );
1494 assert_eq!(
1495 tokenizer.next_token().unwrap(),
1496 Some(Token::String(b"Hello\nWorld".to_vec()))
1497 );
1498 assert_eq!(
1499 tokenizer.next_token().unwrap(),
1500 Some(Token::String(b"Nested (paren)".to_vec()))
1501 );
1502 }
1503
1504 #[test]
1505 fn test_tokenize_hex_strings() {
1506 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1507 let mut tokenizer = ContentTokenizer::new(input);
1508
1509 assert_eq!(
1510 tokenizer.next_token().unwrap(),
1511 Some(Token::HexString(b"Hello".to_vec()))
1512 );
1513 assert_eq!(
1514 tokenizer.next_token().unwrap(),
1515 Some(Token::HexString(b"Hello".to_vec()))
1516 );
1517 }
1518
1519 #[test]
1520 fn test_tokenize_names() {
1521 let input = b"/Name /Name#20with#20spaces /A#42C";
1522 let mut tokenizer = ContentTokenizer::new(input);
1523
1524 assert_eq!(
1525 tokenizer.next_token().unwrap(),
1526 Some(Token::Name("Name".to_string()))
1527 );
1528 assert_eq!(
1529 tokenizer.next_token().unwrap(),
1530 Some(Token::Name("Name with spaces".to_string()))
1531 );
1532 assert_eq!(
1533 tokenizer.next_token().unwrap(),
1534 Some(Token::Name("ABC".to_string()))
1535 );
1536 }
1537
1538 #[test]
1539 fn test_tokenize_operators() {
1540 let input = b"BT Tj ET q Q";
1541 let mut tokenizer = ContentTokenizer::new(input);
1542
1543 assert_eq!(
1544 tokenizer.next_token().unwrap(),
1545 Some(Token::Operator("BT".to_string()))
1546 );
1547 assert_eq!(
1548 tokenizer.next_token().unwrap(),
1549 Some(Token::Operator("Tj".to_string()))
1550 );
1551 assert_eq!(
1552 tokenizer.next_token().unwrap(),
1553 Some(Token::Operator("ET".to_string()))
1554 );
1555 assert_eq!(
1556 tokenizer.next_token().unwrap(),
1557 Some(Token::Operator("q".to_string()))
1558 );
1559 assert_eq!(
1560 tokenizer.next_token().unwrap(),
1561 Some(Token::Operator("Q".to_string()))
1562 );
1563 }
1564
1565 #[test]
1566 fn test_parse_text_operators() {
1567 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1568 let operators = ContentParser::parse(content).unwrap();
1569
1570 assert_eq!(operators.len(), 5);
1571 assert_eq!(operators[0], ContentOperation::BeginText);
1572 assert_eq!(
1573 operators[1],
1574 ContentOperation::SetFont("F1".to_string(), 12.0)
1575 );
1576 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1577 assert_eq!(
1578 operators[3],
1579 ContentOperation::ShowText(b"Hello World".to_vec())
1580 );
1581 assert_eq!(operators[4], ContentOperation::EndText);
1582 }
1583
1584 #[test]
1585 fn test_parse_graphics_operators() {
1586 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1587 let operators = ContentParser::parse(content).unwrap();
1588
1589 assert_eq!(operators.len(), 6);
1590 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1591 assert_eq!(
1592 operators[1],
1593 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1594 );
1595 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1596 assert_eq!(
1597 operators[3],
1598 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1599 );
1600 assert_eq!(operators[4], ContentOperation::Stroke);
1601 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1602 }
1603
1604 #[test]
1605 fn test_parse_color_operators() {
1606 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1607 let operators = ContentParser::parse(content).unwrap();
1608
1609 assert_eq!(operators.len(), 3);
1610 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1611 assert_eq!(
1612 operators[1],
1613 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1614 );
1615 assert_eq!(
1616 operators[2],
1617 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1618 );
1619 }
1620
1621 mod comprehensive_tests {
1623 use super::*;
1624
1625 #[test]
1626 fn test_all_text_operators() {
1627 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1629 let operators = ContentParser::parse(content).unwrap();
1630
1631 assert_eq!(operators[0], ContentOperation::BeginText);
1632 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1633 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1634 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1635 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1636 assert_eq!(
1637 operators[5],
1638 ContentOperation::SetFont("F1".to_string(), 12.0)
1639 );
1640 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1641 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1642 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1643 assert_eq!(
1644 operators[9],
1645 ContentOperation::MoveTextSetLeading(50.0, 150.0)
1646 );
1647 assert_eq!(operators[10], ContentOperation::NextLine);
1648 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1649 assert_eq!(operators[12], ContentOperation::EndText);
1650 }
1651
1652 #[test]
1653 fn test_all_graphics_state_operators() {
1654 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1656 let operators = ContentParser::parse(content).unwrap();
1657
1658 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1659 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1660 assert_eq!(
1661 operators[2],
1662 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1663 );
1664 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1665 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1666 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1667 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1668 assert_eq!(
1669 operators[7],
1670 ContentOperation::SetGraphicsStateParams("GS1".to_string())
1671 );
1672 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1673 assert_eq!(
1674 operators[9],
1675 ContentOperation::SetIntent("Perceptual".to_string())
1676 );
1677 }
1678
1679 #[test]
1680 fn test_all_path_construction_operators() {
1681 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1682 let operators = ContentParser::parse(content).unwrap();
1683
1684 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1685 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1686 assert_eq!(
1687 operators[2],
1688 ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1689 );
1690 assert_eq!(
1691 operators[3],
1692 ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1693 );
1694 assert_eq!(
1695 operators[4],
1696 ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1697 );
1698 assert_eq!(operators[5], ContentOperation::ClosePath);
1699 assert_eq!(
1700 operators[6],
1701 ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1702 );
1703 }
1704
1705 #[test]
1706 fn test_all_path_painting_operators() {
1707 let content = b"S s f F f* B B* b b* n W W*";
1708 let operators = ContentParser::parse(content).unwrap();
1709
1710 assert_eq!(operators[0], ContentOperation::Stroke);
1711 assert_eq!(operators[1], ContentOperation::CloseStroke);
1712 assert_eq!(operators[2], ContentOperation::Fill);
1713 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1715 assert_eq!(operators[5], ContentOperation::FillStroke);
1716 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1717 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1718 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1719 assert_eq!(operators[9], ContentOperation::EndPath);
1720 assert_eq!(operators[10], ContentOperation::Clip);
1721 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1722 }
1723
1724 #[test]
1725 fn test_all_color_operators() {
1726 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1728 let operators = ContentParser::parse(content).unwrap();
1729
1730 assert_eq!(
1731 operators[0],
1732 ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1733 );
1734 assert_eq!(
1735 operators[1],
1736 ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1737 );
1738 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1739 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1740 assert_eq!(
1741 operators[4],
1742 ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1743 );
1744 assert_eq!(
1745 operators[5],
1746 ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1747 );
1748 assert_eq!(
1749 operators[6],
1750 ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1751 );
1752 assert_eq!(
1753 operators[7],
1754 ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1755 );
1756 assert_eq!(
1757 operators[8],
1758 ContentOperation::ShadingFill("Shade1".to_string())
1759 );
1760 }
1761
1762 #[test]
1763 fn test_xobject_and_marked_content_operators() {
1764 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1766 let operators = ContentParser::parse(content).unwrap();
1767
1768 assert_eq!(
1769 operators[0],
1770 ContentOperation::PaintXObject("Image1".to_string())
1771 );
1772 assert_eq!(
1773 operators[1],
1774 ContentOperation::BeginMarkedContent("MC1".to_string())
1775 );
1776 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1777 assert_eq!(
1778 operators[3],
1779 ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1780 );
1781 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1782 assert_eq!(operators[5], ContentOperation::EndCompatibility);
1783 }
1784
1785 #[test]
1786 fn test_complex_content_stream() {
1787 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1788 let operators = ContentParser::parse(content).unwrap();
1789
1790 assert_eq!(operators.len(), 8);
1791 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1792 assert_eq!(
1793 operators[1],
1794 ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1795 );
1796 assert_eq!(operators[2], ContentOperation::BeginText);
1797 assert_eq!(
1798 operators[3],
1799 ContentOperation::SetFont("F1".to_string(), 12.0)
1800 );
1801 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1802 assert_eq!(
1803 operators[5],
1804 ContentOperation::ShowText(b"Complex".to_vec())
1805 );
1806 assert_eq!(operators[6], ContentOperation::EndText);
1807 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1808 }
1809
1810 #[test]
1811 fn test_tokenizer_whitespace_handling() {
1812 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
1813 let mut tokenizer = ContentTokenizer::new(input);
1814
1815 assert_eq!(
1816 tokenizer.next_token().unwrap(),
1817 Some(Token::Operator("BT".to_string()))
1818 );
1819 assert_eq!(
1820 tokenizer.next_token().unwrap(),
1821 Some(Token::Name("F1".to_string()))
1822 );
1823 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1824 assert_eq!(
1825 tokenizer.next_token().unwrap(),
1826 Some(Token::Operator("Tf".to_string()))
1827 );
1828 assert_eq!(
1829 tokenizer.next_token().unwrap(),
1830 Some(Token::Operator("ET".to_string()))
1831 );
1832 assert_eq!(tokenizer.next_token().unwrap(), None);
1833 }
1834
1835 #[test]
1836 fn test_tokenizer_edge_cases() {
1837 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1839 let mut tokenizer = ContentTokenizer::new(input);
1840
1841 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1842 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1843 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1844 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1845 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1846 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1847 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1848 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1849 }
1850
1851 #[test]
1852 fn test_string_parsing_edge_cases() {
1853 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1854 let mut tokenizer = ContentTokenizer::new(input);
1855
1856 assert_eq!(
1857 tokenizer.next_token().unwrap(),
1858 Some(Token::String(b"Simple".to_vec()))
1859 );
1860 assert_eq!(
1861 tokenizer.next_token().unwrap(),
1862 Some(Token::String(b"With\\backslash".to_vec()))
1863 );
1864 assert_eq!(
1865 tokenizer.next_token().unwrap(),
1866 Some(Token::String(b"With)paren".to_vec()))
1867 );
1868 assert_eq!(
1869 tokenizer.next_token().unwrap(),
1870 Some(Token::String(b"With\newline".to_vec()))
1871 );
1872 assert_eq!(
1873 tokenizer.next_token().unwrap(),
1874 Some(Token::String(b"With\ttab".to_vec()))
1875 );
1876 assert_eq!(
1877 tokenizer.next_token().unwrap(),
1878 Some(Token::String(b"With\rcarriage".to_vec()))
1879 );
1880 assert_eq!(
1881 tokenizer.next_token().unwrap(),
1882 Some(Token::String(b"With\x08backspace".to_vec()))
1883 );
1884 assert_eq!(
1885 tokenizer.next_token().unwrap(),
1886 Some(Token::String(b"With\x0Cformfeed".to_vec()))
1887 );
1888 assert_eq!(
1889 tokenizer.next_token().unwrap(),
1890 Some(Token::String(b"With(leftparen".to_vec()))
1891 );
1892 assert_eq!(
1893 tokenizer.next_token().unwrap(),
1894 Some(Token::String(b"With)rightparen".to_vec()))
1895 );
1896 }
1897
1898 #[test]
1899 fn test_hex_string_parsing() {
1900 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1901 let mut tokenizer = ContentTokenizer::new(input);
1902
1903 assert_eq!(
1904 tokenizer.next_token().unwrap(),
1905 Some(Token::HexString(b"Hello".to_vec()))
1906 );
1907 assert_eq!(
1908 tokenizer.next_token().unwrap(),
1909 Some(Token::HexString(b"Hello".to_vec()))
1910 );
1911 assert_eq!(
1912 tokenizer.next_token().unwrap(),
1913 Some(Token::HexString(b"HelloW".to_vec()))
1914 );
1915 assert_eq!(
1916 tokenizer.next_token().unwrap(),
1917 Some(Token::HexString(b"Hello\x50".to_vec()))
1918 );
1919 }
1920
1921 #[test]
1922 fn test_name_parsing_edge_cases() {
1923 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1924 let mut tokenizer = ContentTokenizer::new(input);
1925
1926 assert_eq!(
1927 tokenizer.next_token().unwrap(),
1928 Some(Token::Name("Name".to_string()))
1929 );
1930 assert_eq!(
1931 tokenizer.next_token().unwrap(),
1932 Some(Token::Name("Name with spaces".to_string()))
1933 );
1934 assert_eq!(
1935 tokenizer.next_token().unwrap(),
1936 Some(Token::Name("Name#with#hash".to_string()))
1937 );
1938 assert_eq!(
1939 tokenizer.next_token().unwrap(),
1940 Some(Token::Name("Name/with/slash".to_string()))
1941 );
1942 assert_eq!(
1943 tokenizer.next_token().unwrap(),
1944 Some(Token::Name("EmptyName".to_string()))
1945 );
1946 }
1947
1948 #[test]
1949 fn test_operator_parsing_edge_cases() {
1950 let content = b"q q q Q Q Q BT BT ET ET";
1951 let operators = ContentParser::parse(content).unwrap();
1952
1953 assert_eq!(operators.len(), 10);
1954 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1955 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1956 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1957 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1958 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1959 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1960 assert_eq!(operators[6], ContentOperation::BeginText);
1961 assert_eq!(operators[7], ContentOperation::BeginText);
1962 assert_eq!(operators[8], ContentOperation::EndText);
1963 assert_eq!(operators[9], ContentOperation::EndText);
1964 }
1965
1966 #[test]
1967 fn test_error_handling_insufficient_operands() {
1968 let content = b"100 Td"; let result = ContentParser::parse(content);
1970 assert!(result.is_err());
1971 }
1972
1973 #[test]
1974 fn test_error_handling_invalid_operator() {
1975 let content = b"100 200 INVALID";
1976 let result = ContentParser::parse(content);
1977 assert!(result.is_err());
1978 }
1979
1980 #[test]
1981 fn test_error_handling_malformed_string() {
1982 let input = b"(Unclosed string";
1984 let mut tokenizer = ContentTokenizer::new(input);
1985 let result = tokenizer.next_token();
1986 assert!(result.is_ok() || result.is_err());
1989 }
1990
1991 #[test]
1992 fn test_error_handling_malformed_hex_string() {
1993 let input = b"<48656C6C6G>";
1994 let mut tokenizer = ContentTokenizer::new(input);
1995 let result = tokenizer.next_token();
1996 assert!(result.is_err());
1997 }
1998
1999 #[test]
2000 fn test_error_handling_malformed_name() {
2001 let input = b"/Name#GG";
2002 let mut tokenizer = ContentTokenizer::new(input);
2003 let result = tokenizer.next_token();
2004 assert!(result.is_err());
2005 }
2006
2007 #[test]
2008 fn test_empty_content_stream() {
2009 let content = b"";
2010 let operators = ContentParser::parse(content).unwrap();
2011 assert_eq!(operators.len(), 0);
2012 }
2013
2014 #[test]
2015 fn test_whitespace_only_content_stream() {
2016 let content = b" \t\n\r ";
2017 let operators = ContentParser::parse(content).unwrap();
2018 assert_eq!(operators.len(), 0);
2019 }
2020
2021 #[test]
2022 fn test_mixed_integer_and_real_operands() {
2023 let content = b"100 200 m 150 200 l";
2025 let operators = ContentParser::parse(content).unwrap();
2026
2027 assert_eq!(operators.len(), 2);
2028 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2029 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2030 }
2031
2032 #[test]
2033 fn test_negative_operands() {
2034 let content = b"-100 -200 Td -50.5 -75.2 TD";
2035 let operators = ContentParser::parse(content).unwrap();
2036
2037 assert_eq!(operators.len(), 2);
2038 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2039 assert_eq!(
2040 operators[1],
2041 ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2042 );
2043 }
2044
2045 #[test]
2046 fn test_large_numbers() {
2047 let content = b"999999.999999 -999999.999999 m";
2048 let operators = ContentParser::parse(content).unwrap();
2049
2050 assert_eq!(operators.len(), 1);
2051 assert_eq!(
2052 operators[0],
2053 ContentOperation::MoveTo(999999.999999, -999999.999999)
2054 );
2055 }
2056
2057 #[test]
2058 fn test_scientific_notation() {
2059 let content = b"123.45 -456.78 m";
2061 let operators = ContentParser::parse(content).unwrap();
2062
2063 assert_eq!(operators.len(), 1);
2064 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2065 }
2066
2067 #[test]
2068 fn test_show_text_array_complex() {
2069 let content = b"(Hello) TJ";
2071 let result = ContentParser::parse(content);
2072 assert!(result.is_err());
2074 }
2075
2076 #[test]
2077 fn test_dash_pattern_empty() {
2078 let content = b"0 d";
2080 let result = ContentParser::parse(content);
2081 assert!(result.is_err());
2083 }
2084
2085 #[test]
2086 fn test_dash_pattern_complex() {
2087 let content = b"2.5 d";
2089 let result = ContentParser::parse(content);
2090 assert!(result.is_err());
2092 }
2093
2094 #[test]
2095 fn test_pop_array_removes_array_end() {
2096 let parser = ContentParser::new(b"");
2098
2099 let mut operands = vec![
2101 Token::ArrayStart,
2102 Token::Integer(1),
2103 Token::Integer(2),
2104 Token::Integer(3),
2105 Token::ArrayEnd,
2106 ];
2107 let result = parser.pop_array(&mut operands).unwrap();
2108 assert_eq!(result.len(), 3);
2109 assert!(operands.is_empty());
2110
2111 let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2113 let result = parser.pop_array(&mut operands).unwrap();
2114 assert_eq!(result.len(), 2);
2115 assert!(operands.is_empty());
2116 }
2117
2118 #[test]
2119 fn test_dash_array_parsing_valid() {
2120 let parser = ContentParser::new(b"");
2122
2123 let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2125 let result = parser.parse_dash_array(valid_tokens).unwrap();
2126 assert_eq!(result, vec![3.0, 2.0]);
2127
2128 let empty_tokens = vec![];
2130 let result = parser.parse_dash_array(empty_tokens).unwrap();
2131 let expected: Vec<f32> = vec![];
2132 assert_eq!(result, expected);
2133 }
2134
2135 #[test]
2136 fn test_text_array_parsing_valid() {
2137 let parser = ContentParser::new(b"");
2139
2140 let valid_tokens = vec![
2142 Token::String(b"Hello".to_vec()),
2143 Token::Number(-100.0),
2144 Token::String(b"World".to_vec()),
2145 ];
2146 let result = parser.parse_text_array(valid_tokens).unwrap();
2147 assert_eq!(result.len(), 3);
2148 }
2149
2150 #[test]
2151 fn test_inline_image_handling() {
2152 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2153 let operators = ContentParser::parse(content).unwrap();
2154
2155 assert_eq!(operators.len(), 1);
2156 match &operators[0] {
2157 ContentOperation::InlineImage { params, data: _ } => {
2158 assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2160 assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2161 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2162 assert_eq!(
2163 params.get("ColorSpace"),
2164 Some(&Object::Name("DeviceRGB".to_string()))
2165 );
2166 }
2168 _ => panic!("Expected InlineImage operation"),
2169 }
2170 }
2171
2172 #[test]
2173 fn test_inline_image_with_filter() {
2174 let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2175 let operators = ContentParser::parse(content).unwrap();
2176
2177 assert_eq!(operators.len(), 1);
2178 match &operators[0] {
2179 ContentOperation::InlineImage { params, data: _ } => {
2180 assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2181 assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2182 assert_eq!(
2183 params.get("ColorSpace"),
2184 Some(&Object::Name("DeviceGray".to_string()))
2185 );
2186 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2187 assert_eq!(
2188 params.get("Filter"),
2189 Some(&Object::Name("ASCIIHexDecode".to_string()))
2190 );
2191 }
2192 _ => panic!("Expected InlineImage operation"),
2193 }
2194 }
2195
2196 #[test]
2197 fn test_content_parser_performance() {
2198 let mut content = Vec::new();
2199 for i in 0..1000 {
2200 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2201 }
2202
2203 let start = std::time::Instant::now();
2204 let operators = ContentParser::parse(&content).unwrap();
2205 let duration = start.elapsed();
2206
2207 assert_eq!(operators.len(), 1000);
2208 assert!(duration.as_millis() < 100); }
2210
2211 #[test]
2212 fn test_tokenizer_performance() {
2213 let mut input = Vec::new();
2214 for i in 0..1000 {
2215 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2216 }
2217
2218 let start = std::time::Instant::now();
2219 let mut tokenizer = ContentTokenizer::new(&input);
2220 let mut count = 0;
2221 while tokenizer.next_token().unwrap().is_some() {
2222 count += 1;
2223 }
2224 let duration = start.elapsed();
2225
2226 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
2229
2230 #[test]
2231 fn test_memory_usage_large_content() {
2232 let mut content = Vec::new();
2233 for i in 0..10000 {
2234 content.extend_from_slice(
2235 format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2236 .as_bytes(),
2237 );
2238 }
2239
2240 let operators = ContentParser::parse(&content).unwrap();
2241 assert_eq!(operators.len(), 10000);
2242
2243 for op in operators {
2245 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2246 }
2247 }
2248
2249 #[test]
2250 fn test_concurrent_parsing() {
2251 use std::sync::Arc;
2252 use std::thread;
2253
2254 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2255 let handles: Vec<_> = (0..10)
2256 .map(|_| {
2257 let content_clone = content.clone();
2258 thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2259 })
2260 .collect();
2261
2262 for handle in handles {
2263 let operators = handle.join().unwrap();
2264 assert_eq!(operators.len(), 5);
2265 assert_eq!(operators[0], ContentOperation::BeginText);
2266 assert_eq!(operators[4], ContentOperation::EndText);
2267 }
2268 }
2269
2270 #[test]
2273 fn test_tokenizer_hex_string_edge_cases() {
2274 let mut tokenizer = ContentTokenizer::new(b"<>");
2275 let token = tokenizer.next_token().unwrap().unwrap();
2276 match token {
2277 Token::HexString(data) => assert!(data.is_empty()),
2278 _ => panic!("Expected empty hex string"),
2279 }
2280
2281 let mut tokenizer = ContentTokenizer::new(b"<123>");
2283 let token = tokenizer.next_token().unwrap().unwrap();
2284 match token {
2285 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2286 _ => panic!("Expected hex string with odd digits"),
2287 }
2288
2289 let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2291 let token = tokenizer.next_token().unwrap().unwrap();
2292 match token {
2293 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2294 _ => panic!("Expected hex string with whitespace"),
2295 }
2296 }
2297
2298 #[test]
2299 fn test_tokenizer_literal_string_escape_sequences() {
2300 let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2302 let token = tokenizer.next_token().unwrap().unwrap();
2303 match token {
2304 Token::String(data) => {
2305 assert_eq!(
2306 data,
2307 vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2308 );
2309 }
2310 _ => panic!("Expected string with escapes"),
2311 }
2312
2313 let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2315 let token = tokenizer.next_token().unwrap().unwrap();
2316 match token {
2317 Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2318 _ => panic!("Expected string with octal escapes"),
2319 }
2320 }
2321
2322 #[test]
2323 fn test_tokenizer_nested_parentheses() {
2324 let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2325 let token = tokenizer.next_token().unwrap().unwrap();
2326 match token {
2327 Token::String(data) => {
2328 assert_eq!(data, b"outer (inner) text");
2329 }
2330 _ => panic!("Expected string with nested parentheses"),
2331 }
2332
2333 let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2335 let token = tokenizer.next_token().unwrap().unwrap();
2336 match token {
2337 Token::String(data) => {
2338 assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2339 }
2340 _ => panic!("Expected string with deep nesting"),
2341 }
2342 }
2343
2344 #[test]
2345 fn test_tokenizer_name_hex_escapes() {
2346 let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2347 let token = tokenizer.next_token().unwrap().unwrap();
2348 match token {
2349 Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2350 _ => panic!("Expected name with hex escapes"),
2351 }
2352
2353 let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2355 let token = tokenizer.next_token().unwrap().unwrap();
2356 match token {
2357 Token::Name(name) => assert_eq!(name, "Special/()<>"),
2358 _ => panic!("Expected name with special character escapes"),
2359 }
2360 }
2361
2362 #[test]
2363 fn test_tokenizer_number_edge_cases() {
2364 let mut tokenizer = ContentTokenizer::new(b"2147483647");
2366 let token = tokenizer.next_token().unwrap().unwrap();
2367 match token {
2368 Token::Integer(n) => assert_eq!(n, 2147483647),
2369 _ => panic!("Expected large integer"),
2370 }
2371
2372 let mut tokenizer = ContentTokenizer::new(b"0.00001");
2374 let token = tokenizer.next_token().unwrap().unwrap();
2375 match token {
2376 Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2377 _ => panic!("Expected small float"),
2378 }
2379
2380 let mut tokenizer = ContentTokenizer::new(b".5");
2382 let token = tokenizer.next_token().unwrap().unwrap();
2383 match token {
2384 Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2385 _ => panic!("Expected float starting with dot"),
2386 }
2387 }
2388
2389 #[test]
2390 fn test_parser_complex_path_operations() {
2391 let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2392 let operators = ContentParser::parse(content).unwrap();
2393
2394 assert_eq!(operators.len(), 6);
2395 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2396 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2397 assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2398 assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2399 assert_eq!(operators[4], ContentOperation::ClosePath);
2400 assert_eq!(operators[5], ContentOperation::Fill);
2401 }
2402
2403 #[test]
2404 fn test_parser_bezier_curves() {
2405 let content = b"100 100 150 50 200 150 c";
2406 let operators = ContentParser::parse(content).unwrap();
2407
2408 assert_eq!(operators.len(), 1);
2409 match &operators[0] {
2410 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2411 assert!(x1.is_finite() && y1.is_finite());
2415 assert!(x2.is_finite() && y2.is_finite());
2416 assert!(x3.is_finite() && y3.is_finite());
2417 assert!(*x1 >= 50.0 && *x1 <= 200.0);
2419 assert!(*y1 >= 50.0 && *y1 <= 200.0);
2420 }
2421 _ => panic!("Expected CurveTo operation"),
2422 }
2423 }
2424
2425 #[test]
2426 fn test_parser_color_operations() {
2427 let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2428 let operators = ContentParser::parse(content).unwrap();
2429
2430 assert_eq!(operators.len(), 5);
2431 match &operators[0] {
2432 ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2433 _ => panic!("Expected SetNonStrokingGray"),
2434 }
2435 match &operators[1] {
2436 ContentOperation::SetNonStrokingRGB(r, g, b) => {
2437 assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2438 }
2439 _ => panic!("Expected SetNonStrokingRGB"),
2440 }
2441 }
2442
2443 #[test]
2444 fn test_parser_text_positioning_advanced() {
2445 let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2446 let operators = ContentParser::parse(content).unwrap();
2447
2448 assert_eq!(operators.len(), 7);
2449 assert_eq!(operators[0], ContentOperation::BeginText);
2450 match &operators[1] {
2451 ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2452 assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2453 }
2454 _ => panic!("Expected SetTextMatrix"),
2455 }
2456 assert_eq!(operators[6], ContentOperation::EndText);
2457 }
2458
2459 #[test]
2460 fn test_parser_graphics_state_operations() {
2461 let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2462 let operators = ContentParser::parse(content).unwrap();
2463
2464 assert_eq!(operators.len(), 7);
2465 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2466 match &operators[1] {
2467 ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2468 assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2469 }
2470 _ => panic!("Expected SetTransformMatrix"),
2471 }
2472 assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2473 }
2474
2475 #[test]
2476 fn test_parser_xobject_operations() {
2477 let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2478 let operators = ContentParser::parse(content).unwrap();
2479
2480 assert_eq!(operators.len(), 3);
2481 for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2482 match &operators[i] {
2483 ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2484 _ => panic!("Expected PaintXObject"),
2485 }
2486 }
2487 }
2488
2489 #[test]
2490 fn test_parser_marked_content_operations() {
2491 let content = b"/P BMC (Tagged content) Tj EMC";
2492 let operators = ContentParser::parse(content).unwrap();
2493
2494 assert_eq!(operators.len(), 3);
2495 match &operators[0] {
2496 ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2497 _ => panic!("Expected BeginMarkedContent"),
2498 }
2499 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2500 }
2501
2502 #[test]
2503 fn test_parser_error_handling_invalid_operators() {
2504 let content = b"m";
2506 let result = ContentParser::parse(content);
2507 assert!(result.is_err());
2508
2509 let content = b"<ABC DEF BT";
2511 let result = ContentParser::parse(content);
2512 assert!(result.is_err());
2513
2514 let content = b"100 200 300"; let result = ContentParser::parse(content);
2517 assert!(result.is_ok()); }
2519
2520 #[test]
2521 fn test_parser_whitespace_tolerance() {
2522 let content = b" \n\t 100 \r\n 200 \t m \n";
2523 let operators = ContentParser::parse(content).unwrap();
2524
2525 assert_eq!(operators.len(), 1);
2526 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2527 }
2528
2529 #[test]
2530 fn test_tokenizer_comment_handling() {
2531 let content = b"100 % This is a comment\n200 m % Another comment";
2532 let operators = ContentParser::parse(content).unwrap();
2533
2534 assert_eq!(operators.len(), 1);
2535 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2536 }
2537
2538 #[test]
2539 fn test_parser_stream_with_binary_data() {
2540 let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2542
2543 let operators = ContentParser::parse(content).unwrap();
2544 assert_eq!(operators.len(), 2);
2545 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2546 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2547 }
2548
2549 #[test]
2550 fn test_tokenizer_array_parsing() {
2551 let content = b"100 200 m 150 250 l";
2553 let operators = ContentParser::parse(content).unwrap();
2554
2555 assert_eq!(operators.len(), 2);
2556 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2557 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2558 }
2559
2560 #[test]
2561 fn test_parser_rectangle_operations() {
2562 let content = b"10 20 100 50 re 0 0 200 300 re";
2563 let operators = ContentParser::parse(content).unwrap();
2564
2565 assert_eq!(operators.len(), 2);
2566 match &operators[0] {
2567 ContentOperation::Rectangle(x, y, width, height) => {
2568 assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2569 }
2570 _ => panic!("Expected Rectangle operation"),
2571 }
2572 match &operators[1] {
2573 ContentOperation::Rectangle(x, y, width, height) => {
2574 assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2575 }
2576 _ => panic!("Expected Rectangle operation"),
2577 }
2578 }
2579
2580 #[test]
2581 fn test_parser_clipping_operations() {
2582 let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2583 let operators = ContentParser::parse(content).unwrap();
2584
2585 assert_eq!(operators.len(), 6);
2586 assert_eq!(operators[1], ContentOperation::Clip);
2587 assert_eq!(operators[2], ContentOperation::EndPath);
2588 assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2589 assert_eq!(operators[5], ContentOperation::EndPath);
2590 }
2591
2592 #[test]
2593 fn test_parser_painting_operations() {
2594 let content = b"S s f f* B B* b b*";
2595 let operators = ContentParser::parse(content).unwrap();
2596
2597 assert_eq!(operators.len(), 8);
2598 assert_eq!(operators[0], ContentOperation::Stroke);
2599 assert_eq!(operators[1], ContentOperation::CloseStroke);
2600 assert_eq!(operators[2], ContentOperation::Fill);
2601 assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2602 assert_eq!(operators[4], ContentOperation::FillStroke);
2603 assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2604 assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2605 assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2606 }
2607
2608 #[test]
2609 fn test_parser_line_style_operations() {
2610 let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2611 let operators = ContentParser::parse(content).unwrap();
2612
2613 assert_eq!(operators.len(), 5);
2614 assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2615 assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2616 assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2617 assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2618 }
2620
2621 #[test]
2622 fn test_parser_text_state_operations() {
2623 let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2624 let operators = ContentParser::parse(content).unwrap();
2625
2626 assert_eq!(operators.len(), 5);
2627 assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2628 assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2629 assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2630 assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2631 assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2632 }
2633
2634 #[test]
2635 fn test_parser_unicode_text() {
2636 let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2637 let operators = ContentParser::parse(content).unwrap();
2638
2639 assert_eq!(operators.len(), 3);
2640 assert_eq!(operators[0], ContentOperation::BeginText);
2641 match &operators[1] {
2642 ContentOperation::ShowText(text) => {
2643 assert!(text.len() > 5); }
2645 _ => panic!("Expected ShowText operation"),
2646 }
2647 assert_eq!(operators[2], ContentOperation::EndText);
2648 }
2649
2650 #[test]
2651 fn test_parser_stress_test_large_coordinates() {
2652 let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2653 let operators = ContentParser::parse(content).unwrap();
2654
2655 assert_eq!(operators.len(), 1);
2656 match &operators[0] {
2657 ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2658 assert!((*_x1 - 999999.999).abs() < 0.1);
2659 assert!((*_y1 - (-999999.999)).abs() < 0.1);
2660 assert!((*_x3 - 999999.999).abs() < 0.1);
2661 }
2662 _ => panic!("Expected CurveTo operation"),
2663 }
2664 }
2665
2666 #[test]
2667 fn test_parser_empty_content_stream() {
2668 let content = b"";
2669 let operators = ContentParser::parse(content).unwrap();
2670 assert!(operators.is_empty());
2671
2672 let content = b" \n\t\r ";
2673 let operators = ContentParser::parse(content).unwrap();
2674 assert!(operators.is_empty());
2675 }
2676
2677 #[test]
2678 fn test_tokenizer_error_recovery() {
2679 let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2681 let result = ContentParser::parse(content);
2682 assert!(result.is_ok() || result.is_err());
2684 }
2685
2686 #[test]
2687 fn test_parser_optimization_repeated_operations() {
2688 let mut content = Vec::new();
2690 for i in 0..1000 {
2691 content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2692 }
2693
2694 let start = std::time::Instant::now();
2695 let operators = ContentParser::parse(&content).unwrap();
2696 let duration = start.elapsed();
2697
2698 assert_eq!(operators.len(), 1000);
2699 assert!(duration.as_millis() < 200); }
2701
2702 #[test]
2703 fn test_parser_memory_efficiency_large_strings() {
2704 let large_text = "A".repeat(10000);
2706 let content = format!("BT ({}) Tj ET", large_text);
2707 let operators = ContentParser::parse(content.as_bytes()).unwrap();
2708
2709 assert_eq!(operators.len(), 3);
2710 match &operators[1] {
2711 ContentOperation::ShowText(text) => {
2712 assert_eq!(text.len(), 10000);
2713 }
2714 _ => panic!("Expected ShowText operation"),
2715 }
2716 }
2717 }
2718
2719 #[test]
2720 fn test_content_stream_too_large() {
2721 let mut large_content = Vec::new();
2723
2724 for i in 0..10000 {
2726 large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
2727 }
2728 large_content.extend_from_slice(b"S");
2729
2730 let result = ContentParser::parse_content(&large_content);
2732 assert!(result.is_ok());
2733
2734 let operations = result.unwrap();
2735 assert!(operations.len() > 10000);
2737 }
2738
2739 #[test]
2740 fn test_invalid_operator_handling() {
2741 let content = b"100 200 INVALID_OP 300 400 m";
2743 let result = ContentParser::parse_content(content);
2744
2745 if let Ok(operations) = result {
2747 assert!(operations
2749 .iter()
2750 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
2751 }
2752 }
2753
2754 #[test]
2755 fn test_nested_arrays_malformed() {
2756 let content = b"[[(Hello] [World)]] TJ";
2758 let result = ContentParser::parse_content(content);
2759
2760 assert!(result.is_ok() || result.is_err());
2762 }
2763
2764 #[test]
2765 fn test_escape_sequences_in_strings() {
2766 let test_cases = vec![
2768 (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
2769 (b"(\\\\)".as_slice(), b"\\".as_slice()),
2770 (b"(\\(\\))".as_slice(), b"()".as_slice()),
2771 (b"(\\123)".as_slice(), b"S".as_slice()), (b"(\\0)".as_slice(), b"\0".as_slice()),
2773 ];
2774
2775 for (input, expected) in test_cases {
2776 let mut content = Vec::new();
2777 content.extend_from_slice(input);
2778 content.extend_from_slice(b" Tj");
2779
2780 let result = ContentParser::parse_content(&content);
2781 assert!(result.is_ok());
2782
2783 let operations = result.unwrap();
2784 if let ContentOperation::ShowText(text) = &operations[0] {
2785 assert_eq!(text, expected, "Failed for input: {:?}", input);
2786 } else {
2787 panic!("Expected ShowText operation");
2788 }
2789 }
2790 }
2791
2792 #[test]
2793 fn test_content_with_inline_images() {
2794 let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
2796 let result = ContentParser::parse_content(content);
2797
2798 assert!(result.is_ok() || result.is_err());
2800 }
2801
2802 #[test]
2803 fn test_operator_with_missing_operands() {
2804 let test_cases = vec![
2806 b"Tj" as &[u8], b"m", b"rg", b"Tf", ];
2811
2812 for content in test_cases {
2813 let result = ContentParser::parse_content(content);
2814 assert!(result.is_ok() || result.is_err());
2816 }
2817 }
2818}