1use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60#[derive(Debug, Clone, PartialEq)]
94pub enum ContentOperation {
95 BeginText,
99
100 EndText,
103
104 SetCharSpacing(f32),
108
109 SetWordSpacing(f32),
112
113 SetHorizontalScaling(f32),
116
117 SetLeading(f32),
120
121 SetFont(String, f32),
124
125 SetTextRenderMode(i32),
128
129 SetTextRise(f32),
132
133 MoveText(f32, f32),
137
138 MoveTextSetLeading(f32, f32),
141
142 SetTextMatrix(f32, f32, f32, f32, f32, f32),
145
146 NextLine,
149
150 ShowText(Vec<u8>),
154
155 ShowTextArray(Vec<TextElement>),
158
159 NextLineShowText(Vec<u8>),
162
163 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
166
167 SaveGraphicsState,
171
172 RestoreGraphicsState,
175
176 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
179
180 SetLineWidth(f32),
182
183 SetLineCap(i32),
186
187 SetLineJoin(i32),
190
191 SetMiterLimit(f32),
194
195 SetDashPattern(Vec<f32>, f32),
198
199 SetIntent(String),
202
203 SetFlatness(f32),
206
207 SetGraphicsStateParams(String),
210
211 MoveTo(f32, f32),
214
215 LineTo(f32, f32),
217
218 CurveTo(f32, f32, f32, f32, f32, f32),
221
222 CurveToV(f32, f32, f32, f32),
224
225 CurveToY(f32, f32, f32, f32),
227
228 ClosePath,
231
232 Rectangle(f32, f32, f32, f32),
235
236 Stroke,
239
240 CloseStroke,
243
244 Fill,
246
247 FillEvenOdd,
249
250 FillStroke,
253
254 FillStrokeEvenOdd,
256
257 CloseFillStroke,
260
261 CloseFillStrokeEvenOdd,
263
264 EndPath,
267
268 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
276
277 SetNonStrokingColorSpace(String),
280
281 SetStrokingColor(Vec<f32>),
284
285 SetNonStrokingColor(Vec<f32>),
288
289 SetStrokingGray(f32),
292
293 SetNonStrokingGray(f32),
295
296 SetStrokingRGB(f32, f32, f32),
299
300 SetNonStrokingRGB(f32, f32, f32),
302
303 SetStrokingCMYK(f32, f32, f32, f32),
305
306 SetNonStrokingCMYK(f32, f32, f32, f32),
308
309 ShadingFill(String), BeginInlineImage,
315 InlineImage {
317 params: HashMap<String, Object>,
319 data: Vec<u8>,
321 },
322
323 PaintXObject(String),
327
328 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
339
340#[derive(Debug, Clone, PartialEq)]
359pub enum TextElement {
360 Text(Vec<u8>),
362 Spacing(f32),
365}
366
367#[derive(Debug, Clone, PartialEq)]
369pub(super) enum Token {
370 Number(f32),
371 Integer(i32),
372 String(Vec<u8>),
373 HexString(Vec<u8>),
374 Name(String),
375 Operator(String),
376 ArrayStart,
377 ArrayEnd,
378 DictStart,
379 DictEnd,
380}
381
382pub struct ContentTokenizer<'a> {
384 input: &'a [u8],
385 position: usize,
386}
387
388impl<'a> ContentTokenizer<'a> {
389 pub fn new(input: &'a [u8]) -> Self {
391 Self { input, position: 0 }
392 }
393
394 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
396 self.skip_whitespace();
397
398 if self.position >= self.input.len() {
399 return Ok(None);
400 }
401
402 let ch = self.input[self.position];
403
404 match ch {
405 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
407
408 b'(' => self.read_literal_string(),
410 b'<' => {
411 if self.peek_next() == Some(b'<') {
412 self.position += 2;
413 Ok(Some(Token::DictStart))
414 } else {
415 self.read_hex_string()
416 }
417 }
418 b'>' => {
419 if self.peek_next() == Some(b'>') {
420 self.position += 2;
421 Ok(Some(Token::DictEnd))
422 } else {
423 Err(ParseError::SyntaxError {
424 position: self.position,
425 message: "Unexpected '>'".to_string(),
426 })
427 }
428 }
429
430 b'[' => {
432 self.position += 1;
433 Ok(Some(Token::ArrayStart))
434 }
435 b']' => {
436 self.position += 1;
437 Ok(Some(Token::ArrayEnd))
438 }
439
440 b'/' => self.read_name(),
442
443 b';' => {
445 self.position += 1;
446 self.next_token() }
448
449 _ => self.read_operator(),
451 }
452 }
453
454 fn skip_whitespace(&mut self) {
455 while self.position < self.input.len() {
456 match self.input[self.position] {
457 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
458 b'%' => self.skip_comment(),
459 _ => break,
460 }
461 }
462 }
463
464 fn skip_comment(&mut self) {
465 while self.position < self.input.len() && self.input[self.position] != b'\n' {
466 self.position += 1;
467 }
468 }
469
470 fn peek_next(&self) -> Option<u8> {
471 if self.position + 1 < self.input.len() {
472 Some(self.input[self.position + 1])
473 } else {
474 None
475 }
476 }
477
478 fn read_number(&mut self) -> ParseResult<Option<Token>> {
479 let start = self.position;
480 let mut has_dot = false;
481
482 if self.position < self.input.len()
484 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
485 {
486 self.position += 1;
487 }
488
489 while self.position < self.input.len() {
491 match self.input[self.position] {
492 b'0'..=b'9' => self.position += 1,
493 b'.' if !has_dot => {
494 has_dot = true;
495 self.position += 1;
496 }
497 _ => break,
498 }
499 }
500
501 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
502 ParseError::SyntaxError {
503 position: start,
504 message: "Invalid number format".to_string(),
505 }
506 })?;
507
508 if has_dot {
509 let value = num_str
510 .parse::<f32>()
511 .map_err(|_| ParseError::SyntaxError {
512 position: start,
513 message: "Invalid float number".to_string(),
514 })?;
515 Ok(Some(Token::Number(value)))
516 } else {
517 let value = num_str
518 .parse::<i32>()
519 .map_err(|_| ParseError::SyntaxError {
520 position: start,
521 message: "Invalid integer number".to_string(),
522 })?;
523 Ok(Some(Token::Integer(value)))
524 }
525 }
526
527 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
528 self.position += 1; let mut result = Vec::new();
530 let mut paren_depth = 1;
531 let mut escape = false;
532
533 while self.position < self.input.len() && paren_depth > 0 {
534 let ch = self.input[self.position];
535 self.position += 1;
536
537 if escape {
538 match ch {
539 b'n' => result.push(b'\n'),
540 b'r' => result.push(b'\r'),
541 b't' => result.push(b'\t'),
542 b'b' => result.push(b'\x08'),
543 b'f' => result.push(b'\x0C'),
544 b'(' => result.push(b'('),
545 b')' => result.push(b')'),
546 b'\\' => result.push(b'\\'),
547 b'0'..=b'7' => {
548 self.position -= 1;
550 let octal_value = self.read_octal_escape()?;
551 result.push(octal_value);
552 }
553 _ => result.push(ch), }
555 escape = false;
556 } else {
557 match ch {
558 b'\\' => escape = true,
559 b'(' => {
560 paren_depth += 1;
561 result.push(ch);
562 }
563 b')' => {
564 paren_depth -= 1;
565 if paren_depth > 0 {
566 result.push(ch);
567 }
568 }
569 _ => result.push(ch),
570 }
571 }
572 }
573
574 Ok(Some(Token::String(result)))
575 }
576
577 fn read_octal_escape(&mut self) -> ParseResult<u8> {
578 let mut value = 0u8;
579 let mut count = 0;
580
581 while count < 3 && self.position < self.input.len() {
582 match self.input[self.position] {
583 b'0'..=b'7' => {
584 value = value * 8 + (self.input[self.position] - b'0');
585 self.position += 1;
586 count += 1;
587 }
588 _ => break,
589 }
590 }
591
592 Ok(value)
593 }
594
595 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
596 self.position += 1; let mut result = Vec::new();
598 let mut nibble = None;
599
600 while self.position < self.input.len() {
601 let ch = self.input[self.position];
602
603 match ch {
604 b'>' => {
605 self.position += 1;
606 if let Some(n) = nibble {
608 result.push(n << 4);
609 }
610 return Ok(Some(Token::HexString(result)));
611 }
612 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
613 let digit = if ch <= b'9' {
614 ch - b'0'
615 } else if ch <= b'F' {
616 ch - b'A' + 10
617 } else {
618 ch - b'a' + 10
619 };
620
621 if let Some(n) = nibble {
622 result.push((n << 4) | digit);
623 nibble = None;
624 } else {
625 nibble = Some(digit);
626 }
627 self.position += 1;
628 }
629 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
630 self.position += 1;
632 }
633 _ => {
634 return Err(ParseError::SyntaxError {
635 position: self.position,
636 message: format!("Invalid character in hex string: {:?}", ch as char),
637 });
638 }
639 }
640 }
641
642 Err(ParseError::SyntaxError {
643 position: self.position,
644 message: "Unterminated hex string".to_string(),
645 })
646 }
647
648 fn read_name(&mut self) -> ParseResult<Option<Token>> {
649 self.position += 1; let start = self.position;
651
652 while self.position < self.input.len() {
653 let ch = self.input[self.position];
654 match ch {
655 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
656 | b']' | b'{' | b'}' | b'/' | b'%' => break,
657 b'#' => {
658 self.position += 1;
660 if self.position + 1 < self.input.len() {
661 self.position += 2;
662 }
663 }
664 _ => self.position += 1,
665 }
666 }
667
668 let name_bytes = &self.input[start..self.position];
669 let name = self.decode_name(name_bytes)?;
670 Ok(Some(Token::Name(name)))
671 }
672
673 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
674 let mut result = Vec::new();
675 let mut i = 0;
676
677 while i < bytes.len() {
678 if bytes[i] == b'#' && i + 2 < bytes.len() {
679 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
681 ParseError::SyntaxError {
682 position: self.position,
683 message: "Invalid hex escape in name".to_string(),
684 }
685 })?;
686 let value =
687 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
688 position: self.position,
689 message: "Invalid hex escape in name".to_string(),
690 })?;
691 result.push(value);
692 i += 3;
693 } else {
694 result.push(bytes[i]);
695 i += 1;
696 }
697 }
698
699 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
700 position: self.position,
701 message: "Invalid UTF-8 in name".to_string(),
702 })
703 }
704
705 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
706 let start = self.position;
707
708 while self.position < self.input.len() {
709 let ch = self.input[self.position];
710 match ch {
711 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
712 | b']' | b'{' | b'}' | b'/' | b'%' | b';' => break,
713 _ => self.position += 1,
714 }
715 }
716
717 let op_bytes = &self.input[start..self.position];
718 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
719 position: start,
720 message: "Invalid operator".to_string(),
721 })?;
722
723 Ok(Some(Token::Operator(op.to_string())))
724 }
725}
726
727pub struct ContentParser {
746 tokens: Vec<Token>,
747 position: usize,
748}
749
750impl ContentParser {
751 pub fn new(_content: &[u8]) -> Self {
753 Self {
754 tokens: Vec::new(),
755 position: 0,
756 }
757 }
758
759 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
793 Self::parse_content(content)
794 }
795
796 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
801 let mut tokenizer = ContentTokenizer::new(content);
802 let mut tokens = Vec::new();
803
804 while let Some(token) = tokenizer.next_token()? {
806 tokens.push(token);
807 }
808
809 let mut parser = Self {
810 tokens,
811 position: 0,
812 };
813
814 parser.parse_operators()
815 }
816
817 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
818 let mut operators = Vec::new();
819 let mut operand_stack: Vec<Token> = Vec::new();
820
821 while self.position < self.tokens.len() {
822 let token = self.tokens[self.position].clone();
823 self.position += 1;
824
825 match &token {
826 Token::Operator(op) => {
827 let operator = self.parse_operator(op, &mut operand_stack)?;
828 operators.push(operator);
829 }
830 _ => {
831 operand_stack.push(token);
833 }
834 }
835 }
836
837 Ok(operators)
838 }
839
840 fn parse_operator(
841 &mut self,
842 op: &str,
843 operands: &mut Vec<Token>,
844 ) -> ParseResult<ContentOperation> {
845 let operator = match op {
846 "BT" => ContentOperation::BeginText,
848 "ET" => ContentOperation::EndText,
849
850 "Tc" => {
852 let spacing = self.pop_number(operands)?;
853 ContentOperation::SetCharSpacing(spacing)
854 }
855 "Tw" => {
856 let spacing = self.pop_number(operands)?;
857 ContentOperation::SetWordSpacing(spacing)
858 }
859 "Tz" => {
860 let scale = self.pop_number(operands)?;
861 ContentOperation::SetHorizontalScaling(scale)
862 }
863 "TL" => {
864 let leading = self.pop_number(operands)?;
865 ContentOperation::SetLeading(leading)
866 }
867 "Tf" => {
868 let size = self.pop_number(operands)?;
869 let font = self.pop_name(operands)?;
870 ContentOperation::SetFont(font, size)
871 }
872 "Tr" => {
873 let mode = self.pop_integer(operands)?;
874 ContentOperation::SetTextRenderMode(mode)
875 }
876 "Ts" => {
877 let rise = self.pop_number(operands)?;
878 ContentOperation::SetTextRise(rise)
879 }
880
881 "Td" => {
883 let ty = self.pop_number(operands)?;
884 let tx = self.pop_number(operands)?;
885 ContentOperation::MoveText(tx, ty)
886 }
887 "TD" => {
888 let ty = self.pop_number(operands)?;
889 let tx = self.pop_number(operands)?;
890 ContentOperation::MoveTextSetLeading(tx, ty)
891 }
892 "Tm" => {
893 let f = self.pop_number(operands)?;
894 let e = self.pop_number(operands)?;
895 let d = self.pop_number(operands)?;
896 let c = self.pop_number(operands)?;
897 let b = self.pop_number(operands)?;
898 let a = self.pop_number(operands)?;
899 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
900 }
901 "T*" => ContentOperation::NextLine,
902
903 "Tj" => {
905 let text = self.pop_string(operands)?;
906 ContentOperation::ShowText(text)
907 }
908 "TJ" => {
909 let array = self.pop_array(operands)?;
910 let elements = self.parse_text_array(array)?;
911 ContentOperation::ShowTextArray(elements)
912 }
913 "'" => {
914 let text = self.pop_string(operands)?;
915 ContentOperation::NextLineShowText(text)
916 }
917 "\"" => {
918 let text = self.pop_string(operands)?;
919 let aw = self.pop_number(operands)?;
920 let ac = self.pop_number(operands)?;
921 ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
922 }
923
924 "q" => ContentOperation::SaveGraphicsState,
926 "Q" => ContentOperation::RestoreGraphicsState,
927 "cm" => {
928 let f = self.pop_number(operands)?;
929 let e = self.pop_number(operands)?;
930 let d = self.pop_number(operands)?;
931 let c = self.pop_number(operands)?;
932 let b = self.pop_number(operands)?;
933 let a = self.pop_number(operands)?;
934 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
935 }
936 "w" => {
937 let width = self.pop_number(operands)?;
938 ContentOperation::SetLineWidth(width)
939 }
940 "J" => {
941 let cap = self.pop_integer(operands)?;
942 ContentOperation::SetLineCap(cap)
943 }
944 "j" => {
945 let join = self.pop_integer(operands)?;
946 ContentOperation::SetLineJoin(join)
947 }
948 "M" => {
949 let limit = self.pop_number(operands)?;
950 ContentOperation::SetMiterLimit(limit)
951 }
952 "d" => {
953 let phase = self.pop_number(operands)?;
954 let array = self.pop_array(operands)?;
955 let pattern = self.parse_dash_array(array)?;
956 ContentOperation::SetDashPattern(pattern, phase)
957 }
958 "ri" => {
959 let intent = self.pop_name(operands)?;
960 ContentOperation::SetIntent(intent)
961 }
962 "i" => {
963 let flatness = self.pop_number(operands)?;
964 ContentOperation::SetFlatness(flatness)
965 }
966 "gs" => {
967 let name = self.pop_name(operands)?;
968 ContentOperation::SetGraphicsStateParams(name)
969 }
970
971 "m" => {
973 let y = self.pop_number(operands)?;
974 let x = self.pop_number(operands)?;
975 ContentOperation::MoveTo(x, y)
976 }
977 "l" => {
978 let y = self.pop_number(operands)?;
979 let x = self.pop_number(operands)?;
980 ContentOperation::LineTo(x, y)
981 }
982 "c" => {
983 let y3 = self.pop_number(operands)?;
984 let x3 = self.pop_number(operands)?;
985 let y2 = self.pop_number(operands)?;
986 let x2 = self.pop_number(operands)?;
987 let y1 = self.pop_number(operands)?;
988 let x1 = self.pop_number(operands)?;
989 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
990 }
991 "v" => {
992 let y3 = self.pop_number(operands)?;
993 let x3 = self.pop_number(operands)?;
994 let y2 = self.pop_number(operands)?;
995 let x2 = self.pop_number(operands)?;
996 ContentOperation::CurveToV(x2, y2, x3, y3)
997 }
998 "y" => {
999 let y3 = self.pop_number(operands)?;
1000 let x3 = self.pop_number(operands)?;
1001 let y1 = self.pop_number(operands)?;
1002 let x1 = self.pop_number(operands)?;
1003 ContentOperation::CurveToY(x1, y1, x3, y3)
1004 }
1005 "h" => ContentOperation::ClosePath,
1006 "re" => {
1007 let height = self.pop_number(operands)?;
1008 let width = self.pop_number(operands)?;
1009 let y = self.pop_number(operands)?;
1010 let x = self.pop_number(operands)?;
1011 ContentOperation::Rectangle(x, y, width, height)
1012 }
1013
1014 "S" => ContentOperation::Stroke,
1016 "s" => ContentOperation::CloseStroke,
1017 "f" | "F" => ContentOperation::Fill,
1018 "f*" => ContentOperation::FillEvenOdd,
1019 "B" => ContentOperation::FillStroke,
1020 "B*" => ContentOperation::FillStrokeEvenOdd,
1021 "b" => ContentOperation::CloseFillStroke,
1022 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1023 "n" => ContentOperation::EndPath,
1024
1025 "W" => ContentOperation::Clip,
1027 "W*" => ContentOperation::ClipEvenOdd,
1028
1029 "CS" => {
1031 let name = self.pop_name(operands)?;
1032 ContentOperation::SetStrokingColorSpace(name)
1033 }
1034 "cs" => {
1035 let name = self.pop_name(operands)?;
1036 ContentOperation::SetNonStrokingColorSpace(name)
1037 }
1038 "SC" | "SCN" => {
1039 let components = self.pop_color_components(operands)?;
1040 ContentOperation::SetStrokingColor(components)
1041 }
1042 "sc" | "scn" => {
1043 let components = self.pop_color_components(operands)?;
1044 ContentOperation::SetNonStrokingColor(components)
1045 }
1046 "G" => {
1047 let gray = self.pop_number(operands)?;
1048 ContentOperation::SetStrokingGray(gray)
1049 }
1050 "g" => {
1051 let gray = self.pop_number(operands)?;
1052 ContentOperation::SetNonStrokingGray(gray)
1053 }
1054 "RG" => {
1055 let b = self.pop_number(operands)?;
1056 let g = self.pop_number(operands)?;
1057 let r = self.pop_number(operands)?;
1058 ContentOperation::SetStrokingRGB(r, g, b)
1059 }
1060 "rg" => {
1061 let b = self.pop_number(operands)?;
1062 let g = self.pop_number(operands)?;
1063 let r = self.pop_number(operands)?;
1064 ContentOperation::SetNonStrokingRGB(r, g, b)
1065 }
1066 "K" => {
1067 let k = self.pop_number(operands)?;
1068 let y = self.pop_number(operands)?;
1069 let m = self.pop_number(operands)?;
1070 let c = self.pop_number(operands)?;
1071 ContentOperation::SetStrokingCMYK(c, m, y, k)
1072 }
1073 "k" => {
1074 let k = self.pop_number(operands)?;
1075 let y = self.pop_number(operands)?;
1076 let m = self.pop_number(operands)?;
1077 let c = self.pop_number(operands)?;
1078 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1079 }
1080
1081 "sh" => {
1083 let name = self.pop_name(operands)?;
1084 ContentOperation::ShadingFill(name)
1085 }
1086
1087 "Do" => {
1089 let name = self.pop_name(operands)?;
1090 ContentOperation::PaintXObject(name)
1091 }
1092
1093 "BMC" => {
1095 let tag = self.pop_name(operands)?;
1096 ContentOperation::BeginMarkedContent(tag)
1097 }
1098 "BDC" => {
1099 let props = self.pop_dict_or_name(operands)?;
1100 let tag = self.pop_name(operands)?;
1101 ContentOperation::BeginMarkedContentWithProps(tag, props)
1102 }
1103 "EMC" => ContentOperation::EndMarkedContent,
1104 "MP" => {
1105 let tag = self.pop_name(operands)?;
1106 ContentOperation::DefineMarkedContentPoint(tag)
1107 }
1108 "DP" => {
1109 let props = self.pop_dict_or_name(operands)?;
1110 let tag = self.pop_name(operands)?;
1111 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1112 }
1113
1114 "BX" => ContentOperation::BeginCompatibility,
1116 "EX" => ContentOperation::EndCompatibility,
1117
1118 "BI" => {
1120 operands.clear(); self.parse_inline_image()?
1122 }
1123
1124 _ => {
1125 return Err(ParseError::SyntaxError {
1126 position: self.position,
1127 message: format!("Unknown operator: {op}"),
1128 });
1129 }
1130 };
1131
1132 operands.clear(); Ok(operator)
1134 }
1135
1136 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1138 match operands.pop() {
1139 Some(Token::Number(n)) => Ok(n),
1140 Some(Token::Integer(i)) => Ok(i as f32),
1141 _ => Err(ParseError::SyntaxError {
1142 position: self.position,
1143 message: "Expected number operand".to_string(),
1144 }),
1145 }
1146 }
1147
1148 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1149 match operands.pop() {
1150 Some(Token::Integer(i)) => Ok(i),
1151 _ => Err(ParseError::SyntaxError {
1152 position: self.position,
1153 message: "Expected integer operand".to_string(),
1154 }),
1155 }
1156 }
1157
1158 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1159 match operands.pop() {
1160 Some(Token::Name(n)) => Ok(n),
1161 _ => Err(ParseError::SyntaxError {
1162 position: self.position,
1163 message: "Expected name operand".to_string(),
1164 }),
1165 }
1166 }
1167
1168 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1169 match operands.pop() {
1170 Some(Token::String(s)) => Ok(s),
1171 Some(Token::HexString(s)) => Ok(s),
1172 _ => Err(ParseError::SyntaxError {
1173 position: self.position,
1174 message: "Expected string operand".to_string(),
1175 }),
1176 }
1177 }
1178
1179 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1180 let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1182 if has_array_end {
1183 operands.pop(); }
1185
1186 let mut array = Vec::new();
1187 let mut found_start = false;
1188
1189 while let Some(token) = operands.pop() {
1191 match token {
1192 Token::ArrayStart => {
1193 found_start = true;
1194 break;
1195 }
1196 Token::ArrayEnd => {
1197 continue;
1199 }
1200 _ => array.push(token),
1201 }
1202 }
1203
1204 if !found_start {
1205 return Err(ParseError::SyntaxError {
1206 position: self.position,
1207 message: "Expected array".to_string(),
1208 });
1209 }
1210
1211 array.reverse(); Ok(array)
1213 }
1214
1215 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1216 if let Some(token) = operands.pop() {
1217 match token {
1218 Token::Name(name) => {
1219 let mut props = HashMap::new();
1222 props.insert("__resource_ref".to_string(), name);
1223 Ok(props)
1224 }
1225 Token::DictStart => {
1226 let mut props = HashMap::new();
1228
1229 while let Some(value_token) = operands.pop() {
1231 if matches!(value_token, Token::DictEnd) {
1232 break;
1233 }
1234
1235 if let Token::Name(key) = value_token {
1237 if let Some(value_token) = operands.pop() {
1238 let value = match value_token {
1239 Token::Name(name) => name,
1240 Token::String(s) => String::from_utf8_lossy(&s).to_string(),
1241 Token::Integer(i) => i.to_string(),
1242 Token::Number(f) => f.to_string(),
1243 _ => continue, };
1245 props.insert(key, value);
1246 }
1247 }
1248 }
1249
1250 Ok(props)
1251 }
1252 _ => {
1253 Ok(HashMap::new())
1255 }
1256 }
1257 } else {
1258 Err(ParseError::SyntaxError {
1260 position: 0,
1261 message: "Expected dictionary or name for marked content properties".to_string(),
1262 })
1263 }
1264 }
1265
1266 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1267 let mut components = Vec::new();
1268
1269 while let Some(token) = operands.last() {
1271 match token {
1272 Token::Number(n) => {
1273 components.push(*n);
1274 operands.pop();
1275 }
1276 Token::Integer(i) => {
1277 components.push(*i as f32);
1278 operands.pop();
1279 }
1280 _ => break,
1281 }
1282 }
1283
1284 components.reverse();
1285 Ok(components)
1286 }
1287
1288 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1289 let mut elements = Vec::new();
1290
1291 for token in tokens {
1292 match token {
1293 Token::String(s) | Token::HexString(s) => {
1294 elements.push(TextElement::Text(s));
1295 }
1296 Token::Number(n) => {
1297 elements.push(TextElement::Spacing(n));
1298 }
1299 Token::Integer(i) => {
1300 elements.push(TextElement::Spacing(i as f32));
1301 }
1302 _ => {
1303 return Err(ParseError::SyntaxError {
1304 position: self.position,
1305 message: "Invalid element in text array".to_string(),
1306 });
1307 }
1308 }
1309 }
1310
1311 Ok(elements)
1312 }
1313
1314 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1315 let mut pattern = Vec::new();
1316
1317 for token in tokens {
1318 match token {
1319 Token::Number(n) => pattern.push(n),
1320 Token::Integer(i) => pattern.push(i as f32),
1321 _ => {
1322 return Err(ParseError::SyntaxError {
1323 position: self.position,
1324 message: "Invalid element in dash array".to_string(),
1325 });
1326 }
1327 }
1328 }
1329
1330 Ok(pattern)
1331 }
1332
1333 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1334 let mut params = HashMap::new();
1336
1337 while self.position < self.tokens.len() {
1338 if let Token::Operator(op) = &self.tokens[self.position] {
1340 if op == "ID" {
1341 self.position += 1;
1342 break;
1343 }
1344 }
1345
1346 if let Token::Name(key) = &self.tokens[self.position] {
1351 self.position += 1;
1352 if self.position >= self.tokens.len() {
1353 break;
1354 }
1355
1356 let value = match &self.tokens[self.position] {
1358 Token::Integer(n) => Object::Integer(*n as i64),
1359 Token::Number(n) => Object::Real(*n as f64),
1360 Token::Name(s) => Object::Name(expand_inline_name(s)),
1361 Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1362 Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1363 _ => Object::Null,
1364 };
1365
1366 let full_key = expand_inline_key(key);
1368 params.insert(full_key, value);
1369 self.position += 1;
1370 } else {
1371 self.position += 1;
1372 }
1373 }
1374
1375 let mut data = Vec::new();
1378
1379 while self.position < self.tokens.len() {
1385 if let Token::Operator(op) = &self.tokens[self.position] {
1386 if op == "EI" {
1387 self.position += 1;
1388 break;
1389 }
1390 }
1391
1392 match &self.tokens[self.position] {
1394 Token::String(bytes) => data.extend_from_slice(bytes),
1395 Token::HexString(bytes) => data.extend_from_slice(bytes),
1396 Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1397 Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1398 Token::Name(s) => data.extend_from_slice(s.as_bytes()),
1399 Token::Operator(s) if s != "EI" => data.extend_from_slice(s.as_bytes()),
1400 _ => {}
1401 }
1402 self.position += 1;
1403 }
1404
1405 Ok(ContentOperation::InlineImage { params, data })
1406 }
1407}
1408
1409fn expand_inline_key(key: &str) -> String {
1411 match key {
1412 "W" => "Width".to_string(),
1413 "H" => "Height".to_string(),
1414 "CS" | "ColorSpace" => "ColorSpace".to_string(),
1415 "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1416 "F" => "Filter".to_string(),
1417 "DP" | "DecodeParms" => "DecodeParms".to_string(),
1418 "IM" => "ImageMask".to_string(),
1419 "I" => "Interpolate".to_string(),
1420 "Intent" => "Intent".to_string(),
1421 "D" => "Decode".to_string(),
1422 _ => key.to_string(),
1423 }
1424}
1425
1426fn expand_inline_name(name: &str) -> String {
1428 match name {
1429 "G" => "DeviceGray".to_string(),
1430 "RGB" => "DeviceRGB".to_string(),
1431 "CMYK" => "DeviceCMYK".to_string(),
1432 "I" => "Indexed".to_string(),
1433 "AHx" => "ASCIIHexDecode".to_string(),
1434 "A85" => "ASCII85Decode".to_string(),
1435 "LZW" => "LZWDecode".to_string(),
1436 "Fl" => "FlateDecode".to_string(),
1437 "RL" => "RunLengthDecode".to_string(),
1438 "DCT" => "DCTDecode".to_string(),
1439 "CCF" => "CCITTFaxDecode".to_string(),
1440 _ => name.to_string(),
1441 }
1442}
1443
1444#[cfg(test)]
1445mod tests {
1446 use super::*;
1447
1448 #[test]
1449 fn test_tokenize_numbers() {
1450 let input = b"123 -45 3.14159 -0.5 .5";
1451 let mut tokenizer = ContentTokenizer::new(input);
1452
1453 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1454 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1455 assert_eq!(
1456 tokenizer.next_token().unwrap(),
1457 Some(Token::Number(3.14159))
1458 );
1459 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1460 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1461 assert_eq!(tokenizer.next_token().unwrap(), None);
1462 }
1463
1464 #[test]
1465 fn test_tokenize_strings() {
1466 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1467 let mut tokenizer = ContentTokenizer::new(input);
1468
1469 assert_eq!(
1470 tokenizer.next_token().unwrap(),
1471 Some(Token::String(b"Hello World".to_vec()))
1472 );
1473 assert_eq!(
1474 tokenizer.next_token().unwrap(),
1475 Some(Token::String(b"Hello\nWorld".to_vec()))
1476 );
1477 assert_eq!(
1478 tokenizer.next_token().unwrap(),
1479 Some(Token::String(b"Nested (paren)".to_vec()))
1480 );
1481 }
1482
1483 #[test]
1484 fn test_tokenize_hex_strings() {
1485 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1486 let mut tokenizer = ContentTokenizer::new(input);
1487
1488 assert_eq!(
1489 tokenizer.next_token().unwrap(),
1490 Some(Token::HexString(b"Hello".to_vec()))
1491 );
1492 assert_eq!(
1493 tokenizer.next_token().unwrap(),
1494 Some(Token::HexString(b"Hello".to_vec()))
1495 );
1496 }
1497
1498 #[test]
1499 fn test_tokenize_names() {
1500 let input = b"/Name /Name#20with#20spaces /A#42C";
1501 let mut tokenizer = ContentTokenizer::new(input);
1502
1503 assert_eq!(
1504 tokenizer.next_token().unwrap(),
1505 Some(Token::Name("Name".to_string()))
1506 );
1507 assert_eq!(
1508 tokenizer.next_token().unwrap(),
1509 Some(Token::Name("Name with spaces".to_string()))
1510 );
1511 assert_eq!(
1512 tokenizer.next_token().unwrap(),
1513 Some(Token::Name("ABC".to_string()))
1514 );
1515 }
1516
1517 #[test]
1518 fn test_tokenize_operators() {
1519 let input = b"BT Tj ET q Q";
1520 let mut tokenizer = ContentTokenizer::new(input);
1521
1522 assert_eq!(
1523 tokenizer.next_token().unwrap(),
1524 Some(Token::Operator("BT".to_string()))
1525 );
1526 assert_eq!(
1527 tokenizer.next_token().unwrap(),
1528 Some(Token::Operator("Tj".to_string()))
1529 );
1530 assert_eq!(
1531 tokenizer.next_token().unwrap(),
1532 Some(Token::Operator("ET".to_string()))
1533 );
1534 assert_eq!(
1535 tokenizer.next_token().unwrap(),
1536 Some(Token::Operator("q".to_string()))
1537 );
1538 assert_eq!(
1539 tokenizer.next_token().unwrap(),
1540 Some(Token::Operator("Q".to_string()))
1541 );
1542 }
1543
1544 #[test]
1545 fn test_parse_text_operators() {
1546 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1547 let operators = ContentParser::parse(content).unwrap();
1548
1549 assert_eq!(operators.len(), 5);
1550 assert_eq!(operators[0], ContentOperation::BeginText);
1551 assert_eq!(
1552 operators[1],
1553 ContentOperation::SetFont("F1".to_string(), 12.0)
1554 );
1555 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1556 assert_eq!(
1557 operators[3],
1558 ContentOperation::ShowText(b"Hello World".to_vec())
1559 );
1560 assert_eq!(operators[4], ContentOperation::EndText);
1561 }
1562
1563 #[test]
1564 fn test_parse_graphics_operators() {
1565 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1566 let operators = ContentParser::parse(content).unwrap();
1567
1568 assert_eq!(operators.len(), 6);
1569 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1570 assert_eq!(
1571 operators[1],
1572 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1573 );
1574 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1575 assert_eq!(
1576 operators[3],
1577 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1578 );
1579 assert_eq!(operators[4], ContentOperation::Stroke);
1580 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1581 }
1582
1583 #[test]
1584 fn test_parse_color_operators() {
1585 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1586 let operators = ContentParser::parse(content).unwrap();
1587
1588 assert_eq!(operators.len(), 3);
1589 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1590 assert_eq!(
1591 operators[1],
1592 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1593 );
1594 assert_eq!(
1595 operators[2],
1596 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1597 );
1598 }
1599
1600 mod comprehensive_tests {
1602 use super::*;
1603
1604 #[test]
1605 fn test_all_text_operators() {
1606 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1608 let operators = ContentParser::parse(content).unwrap();
1609
1610 assert_eq!(operators[0], ContentOperation::BeginText);
1611 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1612 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1613 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1614 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1615 assert_eq!(
1616 operators[5],
1617 ContentOperation::SetFont("F1".to_string(), 12.0)
1618 );
1619 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1620 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1621 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1622 assert_eq!(
1623 operators[9],
1624 ContentOperation::MoveTextSetLeading(50.0, 150.0)
1625 );
1626 assert_eq!(operators[10], ContentOperation::NextLine);
1627 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1628 assert_eq!(operators[12], ContentOperation::EndText);
1629 }
1630
1631 #[test]
1632 fn test_all_graphics_state_operators() {
1633 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1635 let operators = ContentParser::parse(content).unwrap();
1636
1637 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1638 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1639 assert_eq!(
1640 operators[2],
1641 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1642 );
1643 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1644 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1645 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1646 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1647 assert_eq!(
1648 operators[7],
1649 ContentOperation::SetGraphicsStateParams("GS1".to_string())
1650 );
1651 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1652 assert_eq!(
1653 operators[9],
1654 ContentOperation::SetIntent("Perceptual".to_string())
1655 );
1656 }
1657
1658 #[test]
1659 fn test_all_path_construction_operators() {
1660 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1661 let operators = ContentParser::parse(content).unwrap();
1662
1663 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1664 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1665 assert_eq!(
1666 operators[2],
1667 ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1668 );
1669 assert_eq!(
1670 operators[3],
1671 ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1672 );
1673 assert_eq!(
1674 operators[4],
1675 ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1676 );
1677 assert_eq!(operators[5], ContentOperation::ClosePath);
1678 assert_eq!(
1679 operators[6],
1680 ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1681 );
1682 }
1683
1684 #[test]
1685 fn test_all_path_painting_operators() {
1686 let content = b"S s f F f* B B* b b* n W W*";
1687 let operators = ContentParser::parse(content).unwrap();
1688
1689 assert_eq!(operators[0], ContentOperation::Stroke);
1690 assert_eq!(operators[1], ContentOperation::CloseStroke);
1691 assert_eq!(operators[2], ContentOperation::Fill);
1692 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1694 assert_eq!(operators[5], ContentOperation::FillStroke);
1695 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1696 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1697 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1698 assert_eq!(operators[9], ContentOperation::EndPath);
1699 assert_eq!(operators[10], ContentOperation::Clip);
1700 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1701 }
1702
1703 #[test]
1704 fn test_all_color_operators() {
1705 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1707 let operators = ContentParser::parse(content).unwrap();
1708
1709 assert_eq!(
1710 operators[0],
1711 ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1712 );
1713 assert_eq!(
1714 operators[1],
1715 ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1716 );
1717 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1718 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1719 assert_eq!(
1720 operators[4],
1721 ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1722 );
1723 assert_eq!(
1724 operators[5],
1725 ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1726 );
1727 assert_eq!(
1728 operators[6],
1729 ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1730 );
1731 assert_eq!(
1732 operators[7],
1733 ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1734 );
1735 assert_eq!(
1736 operators[8],
1737 ContentOperation::ShadingFill("Shade1".to_string())
1738 );
1739 }
1740
1741 #[test]
1742 fn test_xobject_and_marked_content_operators() {
1743 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1745 let operators = ContentParser::parse(content).unwrap();
1746
1747 assert_eq!(
1748 operators[0],
1749 ContentOperation::PaintXObject("Image1".to_string())
1750 );
1751 assert_eq!(
1752 operators[1],
1753 ContentOperation::BeginMarkedContent("MC1".to_string())
1754 );
1755 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1756 assert_eq!(
1757 operators[3],
1758 ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1759 );
1760 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1761 assert_eq!(operators[5], ContentOperation::EndCompatibility);
1762 }
1763
1764 #[test]
1765 fn test_complex_content_stream() {
1766 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1767 let operators = ContentParser::parse(content).unwrap();
1768
1769 assert_eq!(operators.len(), 8);
1770 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1771 assert_eq!(
1772 operators[1],
1773 ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1774 );
1775 assert_eq!(operators[2], ContentOperation::BeginText);
1776 assert_eq!(
1777 operators[3],
1778 ContentOperation::SetFont("F1".to_string(), 12.0)
1779 );
1780 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1781 assert_eq!(
1782 operators[5],
1783 ContentOperation::ShowText(b"Complex".to_vec())
1784 );
1785 assert_eq!(operators[6], ContentOperation::EndText);
1786 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1787 }
1788
1789 #[test]
1790 fn test_tokenizer_whitespace_handling() {
1791 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
1792 let mut tokenizer = ContentTokenizer::new(input);
1793
1794 assert_eq!(
1795 tokenizer.next_token().unwrap(),
1796 Some(Token::Operator("BT".to_string()))
1797 );
1798 assert_eq!(
1799 tokenizer.next_token().unwrap(),
1800 Some(Token::Name("F1".to_string()))
1801 );
1802 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1803 assert_eq!(
1804 tokenizer.next_token().unwrap(),
1805 Some(Token::Operator("Tf".to_string()))
1806 );
1807 assert_eq!(
1808 tokenizer.next_token().unwrap(),
1809 Some(Token::Operator("ET".to_string()))
1810 );
1811 assert_eq!(tokenizer.next_token().unwrap(), None);
1812 }
1813
1814 #[test]
1815 fn test_tokenizer_edge_cases() {
1816 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1818 let mut tokenizer = ContentTokenizer::new(input);
1819
1820 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1821 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1822 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1823 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1824 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1825 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1826 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1827 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1828 }
1829
1830 #[test]
1831 fn test_string_parsing_edge_cases() {
1832 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1833 let mut tokenizer = ContentTokenizer::new(input);
1834
1835 assert_eq!(
1836 tokenizer.next_token().unwrap(),
1837 Some(Token::String(b"Simple".to_vec()))
1838 );
1839 assert_eq!(
1840 tokenizer.next_token().unwrap(),
1841 Some(Token::String(b"With\\backslash".to_vec()))
1842 );
1843 assert_eq!(
1844 tokenizer.next_token().unwrap(),
1845 Some(Token::String(b"With)paren".to_vec()))
1846 );
1847 assert_eq!(
1848 tokenizer.next_token().unwrap(),
1849 Some(Token::String(b"With\newline".to_vec()))
1850 );
1851 assert_eq!(
1852 tokenizer.next_token().unwrap(),
1853 Some(Token::String(b"With\ttab".to_vec()))
1854 );
1855 assert_eq!(
1856 tokenizer.next_token().unwrap(),
1857 Some(Token::String(b"With\rcarriage".to_vec()))
1858 );
1859 assert_eq!(
1860 tokenizer.next_token().unwrap(),
1861 Some(Token::String(b"With\x08backspace".to_vec()))
1862 );
1863 assert_eq!(
1864 tokenizer.next_token().unwrap(),
1865 Some(Token::String(b"With\x0Cformfeed".to_vec()))
1866 );
1867 assert_eq!(
1868 tokenizer.next_token().unwrap(),
1869 Some(Token::String(b"With(leftparen".to_vec()))
1870 );
1871 assert_eq!(
1872 tokenizer.next_token().unwrap(),
1873 Some(Token::String(b"With)rightparen".to_vec()))
1874 );
1875 }
1876
1877 #[test]
1878 fn test_hex_string_parsing() {
1879 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1880 let mut tokenizer = ContentTokenizer::new(input);
1881
1882 assert_eq!(
1883 tokenizer.next_token().unwrap(),
1884 Some(Token::HexString(b"Hello".to_vec()))
1885 );
1886 assert_eq!(
1887 tokenizer.next_token().unwrap(),
1888 Some(Token::HexString(b"Hello".to_vec()))
1889 );
1890 assert_eq!(
1891 tokenizer.next_token().unwrap(),
1892 Some(Token::HexString(b"HelloW".to_vec()))
1893 );
1894 assert_eq!(
1895 tokenizer.next_token().unwrap(),
1896 Some(Token::HexString(b"Hello\x50".to_vec()))
1897 );
1898 }
1899
1900 #[test]
1901 fn test_name_parsing_edge_cases() {
1902 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1903 let mut tokenizer = ContentTokenizer::new(input);
1904
1905 assert_eq!(
1906 tokenizer.next_token().unwrap(),
1907 Some(Token::Name("Name".to_string()))
1908 );
1909 assert_eq!(
1910 tokenizer.next_token().unwrap(),
1911 Some(Token::Name("Name with spaces".to_string()))
1912 );
1913 assert_eq!(
1914 tokenizer.next_token().unwrap(),
1915 Some(Token::Name("Name#with#hash".to_string()))
1916 );
1917 assert_eq!(
1918 tokenizer.next_token().unwrap(),
1919 Some(Token::Name("Name/with/slash".to_string()))
1920 );
1921 assert_eq!(
1922 tokenizer.next_token().unwrap(),
1923 Some(Token::Name("EmptyName".to_string()))
1924 );
1925 }
1926
1927 #[test]
1928 fn test_operator_parsing_edge_cases() {
1929 let content = b"q q q Q Q Q BT BT ET ET";
1930 let operators = ContentParser::parse(content).unwrap();
1931
1932 assert_eq!(operators.len(), 10);
1933 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1934 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1935 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1936 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1937 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1938 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1939 assert_eq!(operators[6], ContentOperation::BeginText);
1940 assert_eq!(operators[7], ContentOperation::BeginText);
1941 assert_eq!(operators[8], ContentOperation::EndText);
1942 assert_eq!(operators[9], ContentOperation::EndText);
1943 }
1944
1945 #[test]
1946 fn test_error_handling_insufficient_operands() {
1947 let content = b"100 Td"; let result = ContentParser::parse(content);
1949 assert!(result.is_err());
1950 }
1951
1952 #[test]
1953 fn test_error_handling_invalid_operator() {
1954 let content = b"100 200 INVALID";
1955 let result = ContentParser::parse(content);
1956 assert!(result.is_err());
1957 }
1958
1959 #[test]
1960 fn test_error_handling_malformed_string() {
1961 let input = b"(Unclosed string";
1963 let mut tokenizer = ContentTokenizer::new(input);
1964 let result = tokenizer.next_token();
1965 assert!(result.is_ok() || result.is_err());
1968 }
1969
1970 #[test]
1971 fn test_error_handling_malformed_hex_string() {
1972 let input = b"<48656C6C6G>";
1973 let mut tokenizer = ContentTokenizer::new(input);
1974 let result = tokenizer.next_token();
1975 assert!(result.is_err());
1976 }
1977
1978 #[test]
1979 fn test_error_handling_malformed_name() {
1980 let input = b"/Name#GG";
1981 let mut tokenizer = ContentTokenizer::new(input);
1982 let result = tokenizer.next_token();
1983 assert!(result.is_err());
1984 }
1985
1986 #[test]
1987 fn test_empty_content_stream() {
1988 let content = b"";
1989 let operators = ContentParser::parse(content).unwrap();
1990 assert_eq!(operators.len(), 0);
1991 }
1992
1993 #[test]
1994 fn test_whitespace_only_content_stream() {
1995 let content = b" \t\n\r ";
1996 let operators = ContentParser::parse(content).unwrap();
1997 assert_eq!(operators.len(), 0);
1998 }
1999
2000 #[test]
2001 fn test_mixed_integer_and_real_operands() {
2002 let content = b"100 200 m 150 200 l";
2004 let operators = ContentParser::parse(content).unwrap();
2005
2006 assert_eq!(operators.len(), 2);
2007 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2008 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2009 }
2010
2011 #[test]
2012 fn test_negative_operands() {
2013 let content = b"-100 -200 Td -50.5 -75.2 TD";
2014 let operators = ContentParser::parse(content).unwrap();
2015
2016 assert_eq!(operators.len(), 2);
2017 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2018 assert_eq!(
2019 operators[1],
2020 ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2021 );
2022 }
2023
2024 #[test]
2025 fn test_large_numbers() {
2026 let content = b"999999.999999 -999999.999999 m";
2027 let operators = ContentParser::parse(content).unwrap();
2028
2029 assert_eq!(operators.len(), 1);
2030 assert_eq!(
2031 operators[0],
2032 ContentOperation::MoveTo(999999.999999, -999999.999999)
2033 );
2034 }
2035
2036 #[test]
2037 fn test_scientific_notation() {
2038 let content = b"123.45 -456.78 m";
2040 let operators = ContentParser::parse(content).unwrap();
2041
2042 assert_eq!(operators.len(), 1);
2043 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2044 }
2045
2046 #[test]
2047 fn test_show_text_array_complex() {
2048 let content = b"(Hello) TJ";
2050 let result = ContentParser::parse(content);
2051 assert!(result.is_err());
2053 }
2054
2055 #[test]
2056 fn test_dash_pattern_empty() {
2057 let content = b"0 d";
2059 let result = ContentParser::parse(content);
2060 assert!(result.is_err());
2062 }
2063
2064 #[test]
2065 fn test_dash_pattern_complex() {
2066 let content = b"2.5 d";
2068 let result = ContentParser::parse(content);
2069 assert!(result.is_err());
2071 }
2072
2073 #[test]
2074 fn test_pop_array_removes_array_end() {
2075 let parser = ContentParser::new(b"");
2077
2078 let mut operands = vec![
2080 Token::ArrayStart,
2081 Token::Integer(1),
2082 Token::Integer(2),
2083 Token::Integer(3),
2084 Token::ArrayEnd,
2085 ];
2086 let result = parser.pop_array(&mut operands).unwrap();
2087 assert_eq!(result.len(), 3);
2088 assert!(operands.is_empty());
2089
2090 let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2092 let result = parser.pop_array(&mut operands).unwrap();
2093 assert_eq!(result.len(), 2);
2094 assert!(operands.is_empty());
2095 }
2096
2097 #[test]
2098 fn test_dash_array_parsing_valid() {
2099 let parser = ContentParser::new(b"");
2101
2102 let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2104 let result = parser.parse_dash_array(valid_tokens).unwrap();
2105 assert_eq!(result, vec![3.0, 2.0]);
2106
2107 let empty_tokens = vec![];
2109 let result = parser.parse_dash_array(empty_tokens).unwrap();
2110 let expected: Vec<f32> = vec![];
2111 assert_eq!(result, expected);
2112 }
2113
2114 #[test]
2115 fn test_text_array_parsing_valid() {
2116 let parser = ContentParser::new(b"");
2118
2119 let valid_tokens = vec![
2121 Token::String(b"Hello".to_vec()),
2122 Token::Number(-100.0),
2123 Token::String(b"World".to_vec()),
2124 ];
2125 let result = parser.parse_text_array(valid_tokens).unwrap();
2126 assert_eq!(result.len(), 3);
2127 }
2128
2129 #[test]
2130 fn test_inline_image_handling() {
2131 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2132 let operators = ContentParser::parse(content).unwrap();
2133
2134 assert_eq!(operators.len(), 1);
2135 match &operators[0] {
2136 ContentOperation::InlineImage { params, data: _ } => {
2137 assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2139 assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2140 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2141 assert_eq!(
2142 params.get("ColorSpace"),
2143 Some(&Object::Name("DeviceRGB".to_string()))
2144 );
2145 }
2147 _ => panic!("Expected InlineImage operation"),
2148 }
2149 }
2150
2151 #[test]
2152 fn test_inline_image_with_filter() {
2153 let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2154 let operators = ContentParser::parse(content).unwrap();
2155
2156 assert_eq!(operators.len(), 1);
2157 match &operators[0] {
2158 ContentOperation::InlineImage { params, data: _ } => {
2159 assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2160 assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2161 assert_eq!(
2162 params.get("ColorSpace"),
2163 Some(&Object::Name("DeviceGray".to_string()))
2164 );
2165 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2166 assert_eq!(
2167 params.get("Filter"),
2168 Some(&Object::Name("ASCIIHexDecode".to_string()))
2169 );
2170 }
2171 _ => panic!("Expected InlineImage operation"),
2172 }
2173 }
2174
2175 #[test]
2176 fn test_content_parser_performance() {
2177 let mut content = Vec::new();
2178 for i in 0..1000 {
2179 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2180 }
2181
2182 let start = std::time::Instant::now();
2183 let operators = ContentParser::parse(&content).unwrap();
2184 let duration = start.elapsed();
2185
2186 assert_eq!(operators.len(), 1000);
2187 assert!(duration.as_millis() < 100); }
2189
2190 #[test]
2191 fn test_tokenizer_performance() {
2192 let mut input = Vec::new();
2193 for i in 0..1000 {
2194 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2195 }
2196
2197 let start = std::time::Instant::now();
2198 let mut tokenizer = ContentTokenizer::new(&input);
2199 let mut count = 0;
2200 while tokenizer.next_token().unwrap().is_some() {
2201 count += 1;
2202 }
2203 let duration = start.elapsed();
2204
2205 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
2208
2209 #[test]
2210 fn test_memory_usage_large_content() {
2211 let mut content = Vec::new();
2212 for i in 0..10000 {
2213 content.extend_from_slice(
2214 format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2215 .as_bytes(),
2216 );
2217 }
2218
2219 let operators = ContentParser::parse(&content).unwrap();
2220 assert_eq!(operators.len(), 10000);
2221
2222 for op in operators {
2224 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2225 }
2226 }
2227
2228 #[test]
2229 fn test_concurrent_parsing() {
2230 use std::sync::Arc;
2231 use std::thread;
2232
2233 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2234 let handles: Vec<_> = (0..10)
2235 .map(|_| {
2236 let content_clone = content.clone();
2237 thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2238 })
2239 .collect();
2240
2241 for handle in handles {
2242 let operators = handle.join().unwrap();
2243 assert_eq!(operators.len(), 5);
2244 assert_eq!(operators[0], ContentOperation::BeginText);
2245 assert_eq!(operators[4], ContentOperation::EndText);
2246 }
2247 }
2248
2249 #[test]
2252 fn test_tokenizer_hex_string_edge_cases() {
2253 let mut tokenizer = ContentTokenizer::new(b"<>");
2254 let token = tokenizer.next_token().unwrap().unwrap();
2255 match token {
2256 Token::HexString(data) => assert!(data.is_empty()),
2257 _ => panic!("Expected empty hex string"),
2258 }
2259
2260 let mut tokenizer = ContentTokenizer::new(b"<123>");
2262 let token = tokenizer.next_token().unwrap().unwrap();
2263 match token {
2264 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2265 _ => panic!("Expected hex string with odd digits"),
2266 }
2267
2268 let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2270 let token = tokenizer.next_token().unwrap().unwrap();
2271 match token {
2272 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2273 _ => panic!("Expected hex string with whitespace"),
2274 }
2275 }
2276
2277 #[test]
2278 fn test_tokenizer_literal_string_escape_sequences() {
2279 let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2281 let token = tokenizer.next_token().unwrap().unwrap();
2282 match token {
2283 Token::String(data) => {
2284 assert_eq!(
2285 data,
2286 vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2287 );
2288 }
2289 _ => panic!("Expected string with escapes"),
2290 }
2291
2292 let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2294 let token = tokenizer.next_token().unwrap().unwrap();
2295 match token {
2296 Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2297 _ => panic!("Expected string with octal escapes"),
2298 }
2299 }
2300
2301 #[test]
2302 fn test_tokenizer_nested_parentheses() {
2303 let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2304 let token = tokenizer.next_token().unwrap().unwrap();
2305 match token {
2306 Token::String(data) => {
2307 assert_eq!(data, b"outer (inner) text");
2308 }
2309 _ => panic!("Expected string with nested parentheses"),
2310 }
2311
2312 let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2314 let token = tokenizer.next_token().unwrap().unwrap();
2315 match token {
2316 Token::String(data) => {
2317 assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2318 }
2319 _ => panic!("Expected string with deep nesting"),
2320 }
2321 }
2322
2323 #[test]
2324 fn test_tokenizer_name_hex_escapes() {
2325 let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2326 let token = tokenizer.next_token().unwrap().unwrap();
2327 match token {
2328 Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2329 _ => panic!("Expected name with hex escapes"),
2330 }
2331
2332 let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2334 let token = tokenizer.next_token().unwrap().unwrap();
2335 match token {
2336 Token::Name(name) => assert_eq!(name, "Special/()<>"),
2337 _ => panic!("Expected name with special character escapes"),
2338 }
2339 }
2340
2341 #[test]
2342 fn test_tokenizer_number_edge_cases() {
2343 let mut tokenizer = ContentTokenizer::new(b"2147483647");
2345 let token = tokenizer.next_token().unwrap().unwrap();
2346 match token {
2347 Token::Integer(n) => assert_eq!(n, 2147483647),
2348 _ => panic!("Expected large integer"),
2349 }
2350
2351 let mut tokenizer = ContentTokenizer::new(b"0.00001");
2353 let token = tokenizer.next_token().unwrap().unwrap();
2354 match token {
2355 Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2356 _ => panic!("Expected small float"),
2357 }
2358
2359 let mut tokenizer = ContentTokenizer::new(b".5");
2361 let token = tokenizer.next_token().unwrap().unwrap();
2362 match token {
2363 Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2364 _ => panic!("Expected float starting with dot"),
2365 }
2366 }
2367
2368 #[test]
2369 fn test_parser_complex_path_operations() {
2370 let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2371 let operators = ContentParser::parse(content).unwrap();
2372
2373 assert_eq!(operators.len(), 6);
2374 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2375 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2376 assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2377 assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2378 assert_eq!(operators[4], ContentOperation::ClosePath);
2379 assert_eq!(operators[5], ContentOperation::Fill);
2380 }
2381
2382 #[test]
2383 fn test_parser_bezier_curves() {
2384 let content = b"100 100 150 50 200 150 c";
2385 let operators = ContentParser::parse(content).unwrap();
2386
2387 assert_eq!(operators.len(), 1);
2388 match &operators[0] {
2389 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2390 assert!(x1.is_finite() && y1.is_finite());
2394 assert!(x2.is_finite() && y2.is_finite());
2395 assert!(x3.is_finite() && y3.is_finite());
2396 assert!(*x1 >= 50.0 && *x1 <= 200.0);
2398 assert!(*y1 >= 50.0 && *y1 <= 200.0);
2399 }
2400 _ => panic!("Expected CurveTo operation"),
2401 }
2402 }
2403
2404 #[test]
2405 fn test_parser_color_operations() {
2406 let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2407 let operators = ContentParser::parse(content).unwrap();
2408
2409 assert_eq!(operators.len(), 5);
2410 match &operators[0] {
2411 ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2412 _ => panic!("Expected SetNonStrokingGray"),
2413 }
2414 match &operators[1] {
2415 ContentOperation::SetNonStrokingRGB(r, g, b) => {
2416 assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2417 }
2418 _ => panic!("Expected SetNonStrokingRGB"),
2419 }
2420 }
2421
2422 #[test]
2423 fn test_parser_text_positioning_advanced() {
2424 let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2425 let operators = ContentParser::parse(content).unwrap();
2426
2427 assert_eq!(operators.len(), 7);
2428 assert_eq!(operators[0], ContentOperation::BeginText);
2429 match &operators[1] {
2430 ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2431 assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2432 }
2433 _ => panic!("Expected SetTextMatrix"),
2434 }
2435 assert_eq!(operators[6], ContentOperation::EndText);
2436 }
2437
2438 #[test]
2439 fn test_parser_graphics_state_operations() {
2440 let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2441 let operators = ContentParser::parse(content).unwrap();
2442
2443 assert_eq!(operators.len(), 7);
2444 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2445 match &operators[1] {
2446 ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2447 assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2448 }
2449 _ => panic!("Expected SetTransformMatrix"),
2450 }
2451 assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2452 }
2453
2454 #[test]
2455 fn test_parser_xobject_operations() {
2456 let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2457 let operators = ContentParser::parse(content).unwrap();
2458
2459 assert_eq!(operators.len(), 3);
2460 for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2461 match &operators[i] {
2462 ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2463 _ => panic!("Expected PaintXObject"),
2464 }
2465 }
2466 }
2467
2468 #[test]
2469 fn test_parser_marked_content_operations() {
2470 let content = b"/P BMC (Tagged content) Tj EMC";
2471 let operators = ContentParser::parse(content).unwrap();
2472
2473 assert_eq!(operators.len(), 3);
2474 match &operators[0] {
2475 ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2476 _ => panic!("Expected BeginMarkedContent"),
2477 }
2478 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2479 }
2480
2481 #[test]
2482 fn test_parser_error_handling_invalid_operators() {
2483 let content = b"m";
2485 let result = ContentParser::parse(content);
2486 assert!(result.is_err());
2487
2488 let content = b"<ABC DEF BT";
2490 let result = ContentParser::parse(content);
2491 assert!(result.is_err());
2492
2493 let content = b"100 200 300"; let result = ContentParser::parse(content);
2496 assert!(result.is_ok()); }
2498
2499 #[test]
2500 fn test_parser_whitespace_tolerance() {
2501 let content = b" \n\t 100 \r\n 200 \t m \n";
2502 let operators = ContentParser::parse(content).unwrap();
2503
2504 assert_eq!(operators.len(), 1);
2505 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2506 }
2507
2508 #[test]
2509 fn test_tokenizer_comment_handling() {
2510 let content = b"100 % This is a comment\n200 m % Another comment";
2511 let operators = ContentParser::parse(content).unwrap();
2512
2513 assert_eq!(operators.len(), 1);
2514 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2515 }
2516
2517 #[test]
2518 fn test_parser_stream_with_binary_data() {
2519 let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2521
2522 let operators = ContentParser::parse(content).unwrap();
2523 assert_eq!(operators.len(), 2);
2524 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2525 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2526 }
2527
2528 #[test]
2529 fn test_tokenizer_array_parsing() {
2530 let content = b"100 200 m 150 250 l";
2532 let operators = ContentParser::parse(content).unwrap();
2533
2534 assert_eq!(operators.len(), 2);
2535 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2536 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2537 }
2538
2539 #[test]
2540 fn test_parser_rectangle_operations() {
2541 let content = b"10 20 100 50 re 0 0 200 300 re";
2542 let operators = ContentParser::parse(content).unwrap();
2543
2544 assert_eq!(operators.len(), 2);
2545 match &operators[0] {
2546 ContentOperation::Rectangle(x, y, width, height) => {
2547 assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2548 }
2549 _ => panic!("Expected Rectangle operation"),
2550 }
2551 match &operators[1] {
2552 ContentOperation::Rectangle(x, y, width, height) => {
2553 assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2554 }
2555 _ => panic!("Expected Rectangle operation"),
2556 }
2557 }
2558
2559 #[test]
2560 fn test_parser_clipping_operations() {
2561 let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2562 let operators = ContentParser::parse(content).unwrap();
2563
2564 assert_eq!(operators.len(), 6);
2565 assert_eq!(operators[1], ContentOperation::Clip);
2566 assert_eq!(operators[2], ContentOperation::EndPath);
2567 assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2568 assert_eq!(operators[5], ContentOperation::EndPath);
2569 }
2570
2571 #[test]
2572 fn test_parser_painting_operations() {
2573 let content = b"S s f f* B B* b b*";
2574 let operators = ContentParser::parse(content).unwrap();
2575
2576 assert_eq!(operators.len(), 8);
2577 assert_eq!(operators[0], ContentOperation::Stroke);
2578 assert_eq!(operators[1], ContentOperation::CloseStroke);
2579 assert_eq!(operators[2], ContentOperation::Fill);
2580 assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2581 assert_eq!(operators[4], ContentOperation::FillStroke);
2582 assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2583 assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2584 assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2585 }
2586
2587 #[test]
2588 fn test_parser_line_style_operations() {
2589 let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2590 let operators = ContentParser::parse(content).unwrap();
2591
2592 assert_eq!(operators.len(), 5);
2593 assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2594 assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2595 assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2596 assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2597 }
2599
2600 #[test]
2601 fn test_parser_text_state_operations() {
2602 let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2603 let operators = ContentParser::parse(content).unwrap();
2604
2605 assert_eq!(operators.len(), 5);
2606 assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2607 assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2608 assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2609 assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2610 assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2611 }
2612
2613 #[test]
2614 fn test_parser_unicode_text() {
2615 let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2616 let operators = ContentParser::parse(content).unwrap();
2617
2618 assert_eq!(operators.len(), 3);
2619 assert_eq!(operators[0], ContentOperation::BeginText);
2620 match &operators[1] {
2621 ContentOperation::ShowText(text) => {
2622 assert!(text.len() > 5); }
2624 _ => panic!("Expected ShowText operation"),
2625 }
2626 assert_eq!(operators[2], ContentOperation::EndText);
2627 }
2628
2629 #[test]
2630 fn test_parser_stress_test_large_coordinates() {
2631 let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2632 let operators = ContentParser::parse(content).unwrap();
2633
2634 assert_eq!(operators.len(), 1);
2635 match &operators[0] {
2636 ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2637 assert!((*_x1 - 999999.999).abs() < 0.1);
2638 assert!((*_y1 - (-999999.999)).abs() < 0.1);
2639 assert!((*_x3 - 999999.999).abs() < 0.1);
2640 }
2641 _ => panic!("Expected CurveTo operation"),
2642 }
2643 }
2644
2645 #[test]
2646 fn test_parser_empty_content_stream() {
2647 let content = b"";
2648 let operators = ContentParser::parse(content).unwrap();
2649 assert!(operators.is_empty());
2650
2651 let content = b" \n\t\r ";
2652 let operators = ContentParser::parse(content).unwrap();
2653 assert!(operators.is_empty());
2654 }
2655
2656 #[test]
2657 fn test_tokenizer_error_recovery() {
2658 let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2660 let result = ContentParser::parse(content);
2661 assert!(result.is_ok() || result.is_err());
2663 }
2664
2665 #[test]
2666 fn test_parser_optimization_repeated_operations() {
2667 let mut content = Vec::new();
2669 for i in 0..1000 {
2670 content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2671 }
2672
2673 let start = std::time::Instant::now();
2674 let operators = ContentParser::parse(&content).unwrap();
2675 let duration = start.elapsed();
2676
2677 assert_eq!(operators.len(), 1000);
2678 assert!(duration.as_millis() < 200); }
2680
2681 #[test]
2682 fn test_parser_memory_efficiency_large_strings() {
2683 let large_text = "A".repeat(10000);
2685 let content = format!("BT ({}) Tj ET", large_text);
2686 let operators = ContentParser::parse(content.as_bytes()).unwrap();
2687
2688 assert_eq!(operators.len(), 3);
2689 match &operators[1] {
2690 ContentOperation::ShowText(text) => {
2691 assert_eq!(text.len(), 10000);
2692 }
2693 _ => panic!("Expected ShowText operation"),
2694 }
2695 }
2696 }
2697
2698 #[test]
2699 fn test_content_stream_too_large() {
2700 let mut large_content = Vec::new();
2702
2703 for i in 0..10000 {
2705 large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
2706 }
2707 large_content.extend_from_slice(b"S");
2708
2709 let result = ContentParser::parse_content(&large_content);
2711 assert!(result.is_ok());
2712
2713 let operations = result.unwrap();
2714 assert!(operations.len() > 10000);
2716 }
2717
2718 #[test]
2719 fn test_invalid_operator_handling() {
2720 let content = b"100 200 INVALID_OP 300 400 m";
2722 let result = ContentParser::parse_content(content);
2723
2724 if let Ok(operations) = result {
2726 assert!(operations
2728 .iter()
2729 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
2730 }
2731 }
2732
2733 #[test]
2734 fn test_nested_arrays_malformed() {
2735 let content = b"[[(Hello] [World)]] TJ";
2737 let result = ContentParser::parse_content(content);
2738
2739 assert!(result.is_ok() || result.is_err());
2741 }
2742
2743 #[test]
2744 fn test_escape_sequences_in_strings() {
2745 let test_cases = vec![
2747 (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
2748 (b"(\\\\)".as_slice(), b"\\".as_slice()),
2749 (b"(\\(\\))".as_slice(), b"()".as_slice()),
2750 (b"(\\123)".as_slice(), b"S".as_slice()), (b"(\\0)".as_slice(), b"\0".as_slice()),
2752 ];
2753
2754 for (input, expected) in test_cases {
2755 let mut content = Vec::new();
2756 content.extend_from_slice(input);
2757 content.extend_from_slice(b" Tj");
2758
2759 let result = ContentParser::parse_content(&content);
2760 assert!(result.is_ok());
2761
2762 let operations = result.unwrap();
2763 if let ContentOperation::ShowText(text) = &operations[0] {
2764 assert_eq!(text, expected, "Failed for input: {:?}", input);
2765 } else {
2766 panic!("Expected ShowText operation");
2767 }
2768 }
2769 }
2770
2771 #[test]
2772 fn test_content_with_inline_images() {
2773 let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
2775 let result = ContentParser::parse_content(content);
2776
2777 assert!(result.is_ok() || result.is_err());
2779 }
2780
2781 #[test]
2782 fn test_operator_with_missing_operands() {
2783 let test_cases = vec![
2785 b"Tj" as &[u8], b"m", b"rg", b"Tf", ];
2790
2791 for content in test_cases {
2792 let result = ContentParser::parse_content(content);
2793 assert!(result.is_ok() || result.is_err());
2795 }
2796 }
2797}