1use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60#[derive(Debug, Clone, PartialEq)]
94pub enum ContentOperation {
95 BeginText,
99
100 EndText,
103
104 SetCharSpacing(f32),
108
109 SetWordSpacing(f32),
112
113 SetHorizontalScaling(f32),
116
117 SetLeading(f32),
120
121 SetFont(String, f32),
124
125 SetTextRenderMode(i32),
128
129 SetTextRise(f32),
132
133 MoveText(f32, f32),
137
138 MoveTextSetLeading(f32, f32),
141
142 SetTextMatrix(f32, f32, f32, f32, f32, f32),
145
146 NextLine,
149
150 ShowText(Vec<u8>),
154
155 ShowTextArray(Vec<TextElement>),
158
159 NextLineShowText(Vec<u8>),
162
163 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
166
167 SaveGraphicsState,
171
172 RestoreGraphicsState,
175
176 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
179
180 SetLineWidth(f32),
182
183 SetLineCap(i32),
186
187 SetLineJoin(i32),
190
191 SetMiterLimit(f32),
194
195 SetDashPattern(Vec<f32>, f32),
198
199 SetIntent(String),
202
203 SetFlatness(f32),
206
207 SetGraphicsStateParams(String),
210
211 MoveTo(f32, f32),
214
215 LineTo(f32, f32),
217
218 CurveTo(f32, f32, f32, f32, f32, f32),
221
222 CurveToV(f32, f32, f32, f32),
224
225 CurveToY(f32, f32, f32, f32),
227
228 ClosePath,
231
232 Rectangle(f32, f32, f32, f32),
235
236 Stroke,
239
240 CloseStroke,
243
244 Fill,
246
247 FillEvenOdd,
249
250 FillStroke,
253
254 FillStrokeEvenOdd,
256
257 CloseFillStroke,
260
261 CloseFillStrokeEvenOdd,
263
264 EndPath,
267
268 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
276
277 SetNonStrokingColorSpace(String),
280
281 SetStrokingColor(Vec<f32>),
284
285 SetNonStrokingColor(Vec<f32>),
288
289 SetStrokingGray(f32),
292
293 SetNonStrokingGray(f32),
295
296 SetStrokingRGB(f32, f32, f32),
299
300 SetNonStrokingRGB(f32, f32, f32),
302
303 SetStrokingCMYK(f32, f32, f32, f32),
305
306 SetNonStrokingCMYK(f32, f32, f32, f32),
308
309 ShadingFill(String), BeginInlineImage,
315 InlineImage {
317 params: HashMap<String, Object>,
319 data: Vec<u8>,
321 },
322
323 PaintXObject(String),
327
328 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
339
340#[derive(Debug, Clone, PartialEq)]
359pub enum TextElement {
360 Text(Vec<u8>),
362 Spacing(f32),
365}
366
367#[derive(Debug, Clone, PartialEq)]
369pub(super) enum Token {
370 Number(f32),
371 Integer(i32),
372 String(Vec<u8>),
373 HexString(Vec<u8>),
374 Name(String),
375 Operator(String),
376 ArrayStart,
377 ArrayEnd,
378 DictStart,
379 DictEnd,
380}
381
382pub struct ContentTokenizer<'a> {
384 input: &'a [u8],
385 position: usize,
386}
387
388impl<'a> ContentTokenizer<'a> {
389 pub fn new(input: &'a [u8]) -> Self {
391 Self { input, position: 0 }
392 }
393
394 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
396 self.skip_whitespace();
397
398 if self.position >= self.input.len() {
399 return Ok(None);
400 }
401
402 let ch = self.input[self.position];
403
404 match ch {
405 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
407
408 b'(' => self.read_literal_string(),
410 b'<' => {
411 if self.peek_next() == Some(b'<') {
412 self.position += 2;
413 Ok(Some(Token::DictStart))
414 } else {
415 self.read_hex_string()
416 }
417 }
418 b'>' => {
419 if self.peek_next() == Some(b'>') {
420 self.position += 2;
421 Ok(Some(Token::DictEnd))
422 } else {
423 Err(ParseError::SyntaxError {
424 position: self.position,
425 message: "Unexpected '>'".to_string(),
426 })
427 }
428 }
429
430 b'[' => {
432 self.position += 1;
433 Ok(Some(Token::ArrayStart))
434 }
435 b']' => {
436 self.position += 1;
437 Ok(Some(Token::ArrayEnd))
438 }
439
440 b'/' => self.read_name(),
442
443 _ => self.read_operator(),
445 }
446 }
447
448 fn skip_whitespace(&mut self) {
449 while self.position < self.input.len() {
450 match self.input[self.position] {
451 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
452 b'%' => self.skip_comment(),
453 _ => break,
454 }
455 }
456 }
457
458 fn skip_comment(&mut self) {
459 while self.position < self.input.len() && self.input[self.position] != b'\n' {
460 self.position += 1;
461 }
462 }
463
464 fn peek_next(&self) -> Option<u8> {
465 if self.position + 1 < self.input.len() {
466 Some(self.input[self.position + 1])
467 } else {
468 None
469 }
470 }
471
472 fn read_number(&mut self) -> ParseResult<Option<Token>> {
473 let start = self.position;
474 let mut has_dot = false;
475
476 if self.position < self.input.len()
478 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
479 {
480 self.position += 1;
481 }
482
483 while self.position < self.input.len() {
485 match self.input[self.position] {
486 b'0'..=b'9' => self.position += 1,
487 b'.' if !has_dot => {
488 has_dot = true;
489 self.position += 1;
490 }
491 _ => break,
492 }
493 }
494
495 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
496 ParseError::SyntaxError {
497 position: start,
498 message: "Invalid number format".to_string(),
499 }
500 })?;
501
502 if has_dot {
503 let value = num_str
504 .parse::<f32>()
505 .map_err(|_| ParseError::SyntaxError {
506 position: start,
507 message: "Invalid float number".to_string(),
508 })?;
509 Ok(Some(Token::Number(value)))
510 } else {
511 let value = num_str
512 .parse::<i32>()
513 .map_err(|_| ParseError::SyntaxError {
514 position: start,
515 message: "Invalid integer number".to_string(),
516 })?;
517 Ok(Some(Token::Integer(value)))
518 }
519 }
520
521 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
522 self.position += 1; let mut result = Vec::new();
524 let mut paren_depth = 1;
525 let mut escape = false;
526
527 while self.position < self.input.len() && paren_depth > 0 {
528 let ch = self.input[self.position];
529 self.position += 1;
530
531 if escape {
532 match ch {
533 b'n' => result.push(b'\n'),
534 b'r' => result.push(b'\r'),
535 b't' => result.push(b'\t'),
536 b'b' => result.push(b'\x08'),
537 b'f' => result.push(b'\x0C'),
538 b'(' => result.push(b'('),
539 b')' => result.push(b')'),
540 b'\\' => result.push(b'\\'),
541 b'0'..=b'7' => {
542 self.position -= 1;
544 let octal_value = self.read_octal_escape()?;
545 result.push(octal_value);
546 }
547 _ => result.push(ch), }
549 escape = false;
550 } else {
551 match ch {
552 b'\\' => escape = true,
553 b'(' => {
554 paren_depth += 1;
555 result.push(ch);
556 }
557 b')' => {
558 paren_depth -= 1;
559 if paren_depth > 0 {
560 result.push(ch);
561 }
562 }
563 _ => result.push(ch),
564 }
565 }
566 }
567
568 Ok(Some(Token::String(result)))
569 }
570
571 fn read_octal_escape(&mut self) -> ParseResult<u8> {
572 let mut value = 0u8;
573 let mut count = 0;
574
575 while count < 3 && self.position < self.input.len() {
576 match self.input[self.position] {
577 b'0'..=b'7' => {
578 value = value * 8 + (self.input[self.position] - b'0');
579 self.position += 1;
580 count += 1;
581 }
582 _ => break,
583 }
584 }
585
586 Ok(value)
587 }
588
589 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
590 self.position += 1; let mut result = Vec::new();
592 let mut nibble = None;
593
594 while self.position < self.input.len() {
595 let ch = self.input[self.position];
596
597 match ch {
598 b'>' => {
599 self.position += 1;
600 if let Some(n) = nibble {
602 result.push(n << 4);
603 }
604 return Ok(Some(Token::HexString(result)));
605 }
606 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
607 let digit = if ch <= b'9' {
608 ch - b'0'
609 } else if ch <= b'F' {
610 ch - b'A' + 10
611 } else {
612 ch - b'a' + 10
613 };
614
615 if let Some(n) = nibble {
616 result.push((n << 4) | digit);
617 nibble = None;
618 } else {
619 nibble = Some(digit);
620 }
621 self.position += 1;
622 }
623 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
624 self.position += 1;
626 }
627 _ => {
628 return Err(ParseError::SyntaxError {
629 position: self.position,
630 message: format!("Invalid character in hex string: {:?}", ch as char),
631 });
632 }
633 }
634 }
635
636 Err(ParseError::SyntaxError {
637 position: self.position,
638 message: "Unterminated hex string".to_string(),
639 })
640 }
641
642 fn read_name(&mut self) -> ParseResult<Option<Token>> {
643 self.position += 1; let start = self.position;
645
646 while self.position < self.input.len() {
647 let ch = self.input[self.position];
648 match ch {
649 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
650 | b']' | b'{' | b'}' | b'/' | b'%' => break,
651 b'#' => {
652 self.position += 1;
654 if self.position + 1 < self.input.len() {
655 self.position += 2;
656 }
657 }
658 _ => self.position += 1,
659 }
660 }
661
662 let name_bytes = &self.input[start..self.position];
663 let name = self.decode_name(name_bytes)?;
664 Ok(Some(Token::Name(name)))
665 }
666
667 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
668 let mut result = Vec::new();
669 let mut i = 0;
670
671 while i < bytes.len() {
672 if bytes[i] == b'#' && i + 2 < bytes.len() {
673 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
675 ParseError::SyntaxError {
676 position: self.position,
677 message: "Invalid hex escape in name".to_string(),
678 }
679 })?;
680 let value =
681 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
682 position: self.position,
683 message: "Invalid hex escape in name".to_string(),
684 })?;
685 result.push(value);
686 i += 3;
687 } else {
688 result.push(bytes[i]);
689 i += 1;
690 }
691 }
692
693 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
694 position: self.position,
695 message: "Invalid UTF-8 in name".to_string(),
696 })
697 }
698
699 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
700 let start = self.position;
701
702 while self.position < self.input.len() {
703 let ch = self.input[self.position];
704 match ch {
705 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
706 | b']' | b'{' | b'}' | b'/' | b'%' => break,
707 _ => self.position += 1,
708 }
709 }
710
711 let op_bytes = &self.input[start..self.position];
712 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
713 position: start,
714 message: "Invalid operator".to_string(),
715 })?;
716
717 Ok(Some(Token::Operator(op.to_string())))
718 }
719}
720
721pub struct ContentParser {
740 tokens: Vec<Token>,
741 position: usize,
742}
743
744impl ContentParser {
745 pub fn new(_content: &[u8]) -> Self {
747 Self {
748 tokens: Vec::new(),
749 position: 0,
750 }
751 }
752
753 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
787 Self::parse_content(content)
788 }
789
790 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
795 let mut tokenizer = ContentTokenizer::new(content);
796 let mut tokens = Vec::new();
797
798 while let Some(token) = tokenizer.next_token()? {
800 tokens.push(token);
801 }
802
803 let mut parser = Self {
804 tokens,
805 position: 0,
806 };
807
808 parser.parse_operators()
809 }
810
811 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
812 let mut operators = Vec::new();
813 let mut operand_stack: Vec<Token> = Vec::new();
814
815 while self.position < self.tokens.len() {
816 let token = self.tokens[self.position].clone();
817 self.position += 1;
818
819 match &token {
820 Token::Operator(op) => {
821 let operator = self.parse_operator(op, &mut operand_stack)?;
822 operators.push(operator);
823 }
824 _ => {
825 operand_stack.push(token);
827 }
828 }
829 }
830
831 Ok(operators)
832 }
833
834 fn parse_operator(
835 &mut self,
836 op: &str,
837 operands: &mut Vec<Token>,
838 ) -> ParseResult<ContentOperation> {
839 let operator = match op {
840 "BT" => ContentOperation::BeginText,
842 "ET" => ContentOperation::EndText,
843
844 "Tc" => {
846 let spacing = self.pop_number(operands)?;
847 ContentOperation::SetCharSpacing(spacing)
848 }
849 "Tw" => {
850 let spacing = self.pop_number(operands)?;
851 ContentOperation::SetWordSpacing(spacing)
852 }
853 "Tz" => {
854 let scale = self.pop_number(operands)?;
855 ContentOperation::SetHorizontalScaling(scale)
856 }
857 "TL" => {
858 let leading = self.pop_number(operands)?;
859 ContentOperation::SetLeading(leading)
860 }
861 "Tf" => {
862 let size = self.pop_number(operands)?;
863 let font = self.pop_name(operands)?;
864 ContentOperation::SetFont(font, size)
865 }
866 "Tr" => {
867 let mode = self.pop_integer(operands)?;
868 ContentOperation::SetTextRenderMode(mode)
869 }
870 "Ts" => {
871 let rise = self.pop_number(operands)?;
872 ContentOperation::SetTextRise(rise)
873 }
874
875 "Td" => {
877 let ty = self.pop_number(operands)?;
878 let tx = self.pop_number(operands)?;
879 ContentOperation::MoveText(tx, ty)
880 }
881 "TD" => {
882 let ty = self.pop_number(operands)?;
883 let tx = self.pop_number(operands)?;
884 ContentOperation::MoveTextSetLeading(tx, ty)
885 }
886 "Tm" => {
887 let f = self.pop_number(operands)?;
888 let e = self.pop_number(operands)?;
889 let d = self.pop_number(operands)?;
890 let c = self.pop_number(operands)?;
891 let b = self.pop_number(operands)?;
892 let a = self.pop_number(operands)?;
893 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
894 }
895 "T*" => ContentOperation::NextLine,
896
897 "Tj" => {
899 let text = self.pop_string(operands)?;
900 ContentOperation::ShowText(text)
901 }
902 "TJ" => {
903 let array = self.pop_array(operands)?;
904 let elements = self.parse_text_array(array)?;
905 ContentOperation::ShowTextArray(elements)
906 }
907 "'" => {
908 let text = self.pop_string(operands)?;
909 ContentOperation::NextLineShowText(text)
910 }
911 "\"" => {
912 let text = self.pop_string(operands)?;
913 let aw = self.pop_number(operands)?;
914 let ac = self.pop_number(operands)?;
915 ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
916 }
917
918 "q" => ContentOperation::SaveGraphicsState,
920 "Q" => ContentOperation::RestoreGraphicsState,
921 "cm" => {
922 let f = self.pop_number(operands)?;
923 let e = self.pop_number(operands)?;
924 let d = self.pop_number(operands)?;
925 let c = self.pop_number(operands)?;
926 let b = self.pop_number(operands)?;
927 let a = self.pop_number(operands)?;
928 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
929 }
930 "w" => {
931 let width = self.pop_number(operands)?;
932 ContentOperation::SetLineWidth(width)
933 }
934 "J" => {
935 let cap = self.pop_integer(operands)?;
936 ContentOperation::SetLineCap(cap)
937 }
938 "j" => {
939 let join = self.pop_integer(operands)?;
940 ContentOperation::SetLineJoin(join)
941 }
942 "M" => {
943 let limit = self.pop_number(operands)?;
944 ContentOperation::SetMiterLimit(limit)
945 }
946 "d" => {
947 let phase = self.pop_number(operands)?;
948 let array = self.pop_array(operands)?;
949 let pattern = self.parse_dash_array(array)?;
950 ContentOperation::SetDashPattern(pattern, phase)
951 }
952 "ri" => {
953 let intent = self.pop_name(operands)?;
954 ContentOperation::SetIntent(intent)
955 }
956 "i" => {
957 let flatness = self.pop_number(operands)?;
958 ContentOperation::SetFlatness(flatness)
959 }
960 "gs" => {
961 let name = self.pop_name(operands)?;
962 ContentOperation::SetGraphicsStateParams(name)
963 }
964
965 "m" => {
967 let y = self.pop_number(operands)?;
968 let x = self.pop_number(operands)?;
969 ContentOperation::MoveTo(x, y)
970 }
971 "l" => {
972 let y = self.pop_number(operands)?;
973 let x = self.pop_number(operands)?;
974 ContentOperation::LineTo(x, y)
975 }
976 "c" => {
977 let y3 = self.pop_number(operands)?;
978 let x3 = self.pop_number(operands)?;
979 let y2 = self.pop_number(operands)?;
980 let x2 = self.pop_number(operands)?;
981 let y1 = self.pop_number(operands)?;
982 let x1 = self.pop_number(operands)?;
983 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
984 }
985 "v" => {
986 let y3 = self.pop_number(operands)?;
987 let x3 = self.pop_number(operands)?;
988 let y2 = self.pop_number(operands)?;
989 let x2 = self.pop_number(operands)?;
990 ContentOperation::CurveToV(x2, y2, x3, y3)
991 }
992 "y" => {
993 let y3 = self.pop_number(operands)?;
994 let x3 = self.pop_number(operands)?;
995 let y1 = self.pop_number(operands)?;
996 let x1 = self.pop_number(operands)?;
997 ContentOperation::CurveToY(x1, y1, x3, y3)
998 }
999 "h" => ContentOperation::ClosePath,
1000 "re" => {
1001 let height = self.pop_number(operands)?;
1002 let width = self.pop_number(operands)?;
1003 let y = self.pop_number(operands)?;
1004 let x = self.pop_number(operands)?;
1005 ContentOperation::Rectangle(x, y, width, height)
1006 }
1007
1008 "S" => ContentOperation::Stroke,
1010 "s" => ContentOperation::CloseStroke,
1011 "f" | "F" => ContentOperation::Fill,
1012 "f*" => ContentOperation::FillEvenOdd,
1013 "B" => ContentOperation::FillStroke,
1014 "B*" => ContentOperation::FillStrokeEvenOdd,
1015 "b" => ContentOperation::CloseFillStroke,
1016 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1017 "n" => ContentOperation::EndPath,
1018
1019 "W" => ContentOperation::Clip,
1021 "W*" => ContentOperation::ClipEvenOdd,
1022
1023 "CS" => {
1025 let name = self.pop_name(operands)?;
1026 ContentOperation::SetStrokingColorSpace(name)
1027 }
1028 "cs" => {
1029 let name = self.pop_name(operands)?;
1030 ContentOperation::SetNonStrokingColorSpace(name)
1031 }
1032 "SC" | "SCN" => {
1033 let components = self.pop_color_components(operands)?;
1034 ContentOperation::SetStrokingColor(components)
1035 }
1036 "sc" | "scn" => {
1037 let components = self.pop_color_components(operands)?;
1038 ContentOperation::SetNonStrokingColor(components)
1039 }
1040 "G" => {
1041 let gray = self.pop_number(operands)?;
1042 ContentOperation::SetStrokingGray(gray)
1043 }
1044 "g" => {
1045 let gray = self.pop_number(operands)?;
1046 ContentOperation::SetNonStrokingGray(gray)
1047 }
1048 "RG" => {
1049 let b = self.pop_number(operands)?;
1050 let g = self.pop_number(operands)?;
1051 let r = self.pop_number(operands)?;
1052 ContentOperation::SetStrokingRGB(r, g, b)
1053 }
1054 "rg" => {
1055 let b = self.pop_number(operands)?;
1056 let g = self.pop_number(operands)?;
1057 let r = self.pop_number(operands)?;
1058 ContentOperation::SetNonStrokingRGB(r, g, b)
1059 }
1060 "K" => {
1061 let k = self.pop_number(operands)?;
1062 let y = self.pop_number(operands)?;
1063 let m = self.pop_number(operands)?;
1064 let c = self.pop_number(operands)?;
1065 ContentOperation::SetStrokingCMYK(c, m, y, k)
1066 }
1067 "k" => {
1068 let k = self.pop_number(operands)?;
1069 let y = self.pop_number(operands)?;
1070 let m = self.pop_number(operands)?;
1071 let c = self.pop_number(operands)?;
1072 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1073 }
1074
1075 "sh" => {
1077 let name = self.pop_name(operands)?;
1078 ContentOperation::ShadingFill(name)
1079 }
1080
1081 "Do" => {
1083 let name = self.pop_name(operands)?;
1084 ContentOperation::PaintXObject(name)
1085 }
1086
1087 "BMC" => {
1089 let tag = self.pop_name(operands)?;
1090 ContentOperation::BeginMarkedContent(tag)
1091 }
1092 "BDC" => {
1093 let props = self.pop_dict_or_name(operands)?;
1094 let tag = self.pop_name(operands)?;
1095 ContentOperation::BeginMarkedContentWithProps(tag, props)
1096 }
1097 "EMC" => ContentOperation::EndMarkedContent,
1098 "MP" => {
1099 let tag = self.pop_name(operands)?;
1100 ContentOperation::DefineMarkedContentPoint(tag)
1101 }
1102 "DP" => {
1103 let props = self.pop_dict_or_name(operands)?;
1104 let tag = self.pop_name(operands)?;
1105 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1106 }
1107
1108 "BX" => ContentOperation::BeginCompatibility,
1110 "EX" => ContentOperation::EndCompatibility,
1111
1112 "BI" => {
1114 operands.clear(); self.parse_inline_image()?
1116 }
1117
1118 _ => {
1119 return Err(ParseError::SyntaxError {
1120 position: self.position,
1121 message: format!("Unknown operator: {op}"),
1122 });
1123 }
1124 };
1125
1126 operands.clear(); Ok(operator)
1128 }
1129
1130 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1132 match operands.pop() {
1133 Some(Token::Number(n)) => Ok(n),
1134 Some(Token::Integer(i)) => Ok(i as f32),
1135 _ => Err(ParseError::SyntaxError {
1136 position: self.position,
1137 message: "Expected number operand".to_string(),
1138 }),
1139 }
1140 }
1141
1142 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1143 match operands.pop() {
1144 Some(Token::Integer(i)) => Ok(i),
1145 _ => Err(ParseError::SyntaxError {
1146 position: self.position,
1147 message: "Expected integer operand".to_string(),
1148 }),
1149 }
1150 }
1151
1152 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1153 match operands.pop() {
1154 Some(Token::Name(n)) => Ok(n),
1155 _ => Err(ParseError::SyntaxError {
1156 position: self.position,
1157 message: "Expected name operand".to_string(),
1158 }),
1159 }
1160 }
1161
1162 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1163 match operands.pop() {
1164 Some(Token::String(s)) => Ok(s),
1165 Some(Token::HexString(s)) => Ok(s),
1166 _ => Err(ParseError::SyntaxError {
1167 position: self.position,
1168 message: "Expected string operand".to_string(),
1169 }),
1170 }
1171 }
1172
1173 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1174 let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1176 if has_array_end {
1177 operands.pop(); }
1179
1180 let mut array = Vec::new();
1181 let mut found_start = false;
1182
1183 while let Some(token) = operands.pop() {
1185 match token {
1186 Token::ArrayStart => {
1187 found_start = true;
1188 break;
1189 }
1190 Token::ArrayEnd => {
1191 continue;
1193 }
1194 _ => array.push(token),
1195 }
1196 }
1197
1198 if !found_start {
1199 return Err(ParseError::SyntaxError {
1200 position: self.position,
1201 message: "Expected array".to_string(),
1202 });
1203 }
1204
1205 array.reverse(); Ok(array)
1207 }
1208
1209 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1210 if let Some(token) = operands.pop() {
1211 match token {
1212 Token::Name(name) => {
1213 let mut props = HashMap::new();
1216 props.insert("__resource_ref".to_string(), name);
1217 Ok(props)
1218 }
1219 Token::DictStart => {
1220 let mut props = HashMap::new();
1222
1223 while let Some(value_token) = operands.pop() {
1225 if matches!(value_token, Token::DictEnd) {
1226 break;
1227 }
1228
1229 if let Token::Name(key) = value_token {
1231 if let Some(value_token) = operands.pop() {
1232 let value = match value_token {
1233 Token::Name(name) => name,
1234 Token::String(s) => String::from_utf8_lossy(&s).to_string(),
1235 Token::Integer(i) => i.to_string(),
1236 Token::Number(f) => f.to_string(),
1237 _ => continue, };
1239 props.insert(key, value);
1240 }
1241 }
1242 }
1243
1244 Ok(props)
1245 }
1246 _ => {
1247 Ok(HashMap::new())
1249 }
1250 }
1251 } else {
1252 Err(ParseError::SyntaxError {
1254 position: 0,
1255 message: "Expected dictionary or name for marked content properties".to_string(),
1256 })
1257 }
1258 }
1259
1260 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1261 let mut components = Vec::new();
1262
1263 while let Some(token) = operands.last() {
1265 match token {
1266 Token::Number(n) => {
1267 components.push(*n);
1268 operands.pop();
1269 }
1270 Token::Integer(i) => {
1271 components.push(*i as f32);
1272 operands.pop();
1273 }
1274 _ => break,
1275 }
1276 }
1277
1278 components.reverse();
1279 Ok(components)
1280 }
1281
1282 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1283 let mut elements = Vec::new();
1284
1285 for token in tokens {
1286 match token {
1287 Token::String(s) | Token::HexString(s) => {
1288 elements.push(TextElement::Text(s));
1289 }
1290 Token::Number(n) => {
1291 elements.push(TextElement::Spacing(n));
1292 }
1293 Token::Integer(i) => {
1294 elements.push(TextElement::Spacing(i as f32));
1295 }
1296 _ => {
1297 return Err(ParseError::SyntaxError {
1298 position: self.position,
1299 message: "Invalid element in text array".to_string(),
1300 });
1301 }
1302 }
1303 }
1304
1305 Ok(elements)
1306 }
1307
1308 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1309 let mut pattern = Vec::new();
1310
1311 for token in tokens {
1312 match token {
1313 Token::Number(n) => pattern.push(n),
1314 Token::Integer(i) => pattern.push(i as f32),
1315 _ => {
1316 return Err(ParseError::SyntaxError {
1317 position: self.position,
1318 message: "Invalid element in dash array".to_string(),
1319 });
1320 }
1321 }
1322 }
1323
1324 Ok(pattern)
1325 }
1326
1327 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1328 let mut params = HashMap::new();
1330
1331 while self.position < self.tokens.len() {
1332 if let Token::Operator(op) = &self.tokens[self.position] {
1334 if op == "ID" {
1335 self.position += 1;
1336 break;
1337 }
1338 }
1339
1340 if let Token::Name(key) = &self.tokens[self.position] {
1345 self.position += 1;
1346 if self.position >= self.tokens.len() {
1347 break;
1348 }
1349
1350 let value = match &self.tokens[self.position] {
1352 Token::Integer(n) => Object::Integer(*n as i64),
1353 Token::Number(n) => Object::Real(*n as f64),
1354 Token::Name(s) => Object::Name(expand_inline_name(s)),
1355 Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1356 Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1357 _ => Object::Null,
1358 };
1359
1360 let full_key = expand_inline_key(key);
1362 params.insert(full_key, value);
1363 self.position += 1;
1364 } else {
1365 self.position += 1;
1366 }
1367 }
1368
1369 let mut data = Vec::new();
1372
1373 while self.position < self.tokens.len() {
1379 if let Token::Operator(op) = &self.tokens[self.position] {
1380 if op == "EI" {
1381 self.position += 1;
1382 break;
1383 }
1384 }
1385
1386 match &self.tokens[self.position] {
1388 Token::String(bytes) => data.extend_from_slice(bytes),
1389 Token::HexString(bytes) => data.extend_from_slice(bytes),
1390 Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1391 Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1392 Token::Name(s) => data.extend_from_slice(s.as_bytes()),
1393 Token::Operator(s) if s != "EI" => data.extend_from_slice(s.as_bytes()),
1394 _ => {}
1395 }
1396 self.position += 1;
1397 }
1398
1399 Ok(ContentOperation::InlineImage { params, data })
1400 }
1401}
1402
1403fn expand_inline_key(key: &str) -> String {
1405 match key {
1406 "W" => "Width".to_string(),
1407 "H" => "Height".to_string(),
1408 "CS" | "ColorSpace" => "ColorSpace".to_string(),
1409 "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1410 "F" => "Filter".to_string(),
1411 "DP" | "DecodeParms" => "DecodeParms".to_string(),
1412 "IM" => "ImageMask".to_string(),
1413 "I" => "Interpolate".to_string(),
1414 "Intent" => "Intent".to_string(),
1415 "D" => "Decode".to_string(),
1416 _ => key.to_string(),
1417 }
1418}
1419
1420fn expand_inline_name(name: &str) -> String {
1422 match name {
1423 "G" => "DeviceGray".to_string(),
1424 "RGB" => "DeviceRGB".to_string(),
1425 "CMYK" => "DeviceCMYK".to_string(),
1426 "I" => "Indexed".to_string(),
1427 "AHx" => "ASCIIHexDecode".to_string(),
1428 "A85" => "ASCII85Decode".to_string(),
1429 "LZW" => "LZWDecode".to_string(),
1430 "Fl" => "FlateDecode".to_string(),
1431 "RL" => "RunLengthDecode".to_string(),
1432 "DCT" => "DCTDecode".to_string(),
1433 "CCF" => "CCITTFaxDecode".to_string(),
1434 _ => name.to_string(),
1435 }
1436}
1437
1438#[cfg(test)]
1439mod tests {
1440 use super::*;
1441
1442 #[test]
1443 fn test_tokenize_numbers() {
1444 let input = b"123 -45 3.14159 -0.5 .5";
1445 let mut tokenizer = ContentTokenizer::new(input);
1446
1447 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1448 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1449 assert_eq!(
1450 tokenizer.next_token().unwrap(),
1451 Some(Token::Number(3.14159))
1452 );
1453 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1454 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1455 assert_eq!(tokenizer.next_token().unwrap(), None);
1456 }
1457
1458 #[test]
1459 fn test_tokenize_strings() {
1460 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1461 let mut tokenizer = ContentTokenizer::new(input);
1462
1463 assert_eq!(
1464 tokenizer.next_token().unwrap(),
1465 Some(Token::String(b"Hello World".to_vec()))
1466 );
1467 assert_eq!(
1468 tokenizer.next_token().unwrap(),
1469 Some(Token::String(b"Hello\nWorld".to_vec()))
1470 );
1471 assert_eq!(
1472 tokenizer.next_token().unwrap(),
1473 Some(Token::String(b"Nested (paren)".to_vec()))
1474 );
1475 }
1476
1477 #[test]
1478 fn test_tokenize_hex_strings() {
1479 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1480 let mut tokenizer = ContentTokenizer::new(input);
1481
1482 assert_eq!(
1483 tokenizer.next_token().unwrap(),
1484 Some(Token::HexString(b"Hello".to_vec()))
1485 );
1486 assert_eq!(
1487 tokenizer.next_token().unwrap(),
1488 Some(Token::HexString(b"Hello".to_vec()))
1489 );
1490 }
1491
1492 #[test]
1493 fn test_tokenize_names() {
1494 let input = b"/Name /Name#20with#20spaces /A#42C";
1495 let mut tokenizer = ContentTokenizer::new(input);
1496
1497 assert_eq!(
1498 tokenizer.next_token().unwrap(),
1499 Some(Token::Name("Name".to_string()))
1500 );
1501 assert_eq!(
1502 tokenizer.next_token().unwrap(),
1503 Some(Token::Name("Name with spaces".to_string()))
1504 );
1505 assert_eq!(
1506 tokenizer.next_token().unwrap(),
1507 Some(Token::Name("ABC".to_string()))
1508 );
1509 }
1510
1511 #[test]
1512 fn test_tokenize_operators() {
1513 let input = b"BT Tj ET q Q";
1514 let mut tokenizer = ContentTokenizer::new(input);
1515
1516 assert_eq!(
1517 tokenizer.next_token().unwrap(),
1518 Some(Token::Operator("BT".to_string()))
1519 );
1520 assert_eq!(
1521 tokenizer.next_token().unwrap(),
1522 Some(Token::Operator("Tj".to_string()))
1523 );
1524 assert_eq!(
1525 tokenizer.next_token().unwrap(),
1526 Some(Token::Operator("ET".to_string()))
1527 );
1528 assert_eq!(
1529 tokenizer.next_token().unwrap(),
1530 Some(Token::Operator("q".to_string()))
1531 );
1532 assert_eq!(
1533 tokenizer.next_token().unwrap(),
1534 Some(Token::Operator("Q".to_string()))
1535 );
1536 }
1537
1538 #[test]
1539 fn test_parse_text_operators() {
1540 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1541 let operators = ContentParser::parse(content).unwrap();
1542
1543 assert_eq!(operators.len(), 5);
1544 assert_eq!(operators[0], ContentOperation::BeginText);
1545 assert_eq!(
1546 operators[1],
1547 ContentOperation::SetFont("F1".to_string(), 12.0)
1548 );
1549 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1550 assert_eq!(
1551 operators[3],
1552 ContentOperation::ShowText(b"Hello World".to_vec())
1553 );
1554 assert_eq!(operators[4], ContentOperation::EndText);
1555 }
1556
1557 #[test]
1558 fn test_parse_graphics_operators() {
1559 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1560 let operators = ContentParser::parse(content).unwrap();
1561
1562 assert_eq!(operators.len(), 6);
1563 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1564 assert_eq!(
1565 operators[1],
1566 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1567 );
1568 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1569 assert_eq!(
1570 operators[3],
1571 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1572 );
1573 assert_eq!(operators[4], ContentOperation::Stroke);
1574 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1575 }
1576
1577 #[test]
1578 fn test_parse_color_operators() {
1579 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1580 let operators = ContentParser::parse(content).unwrap();
1581
1582 assert_eq!(operators.len(), 3);
1583 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1584 assert_eq!(
1585 operators[1],
1586 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1587 );
1588 assert_eq!(
1589 operators[2],
1590 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1591 );
1592 }
1593
1594 mod comprehensive_tests {
1596 use super::*;
1597
1598 #[test]
1599 fn test_all_text_operators() {
1600 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1602 let operators = ContentParser::parse(content).unwrap();
1603
1604 assert_eq!(operators[0], ContentOperation::BeginText);
1605 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1606 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1607 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1608 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1609 assert_eq!(
1610 operators[5],
1611 ContentOperation::SetFont("F1".to_string(), 12.0)
1612 );
1613 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1614 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1615 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1616 assert_eq!(
1617 operators[9],
1618 ContentOperation::MoveTextSetLeading(50.0, 150.0)
1619 );
1620 assert_eq!(operators[10], ContentOperation::NextLine);
1621 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1622 assert_eq!(operators[12], ContentOperation::EndText);
1623 }
1624
1625 #[test]
1626 fn test_all_graphics_state_operators() {
1627 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1629 let operators = ContentParser::parse(content).unwrap();
1630
1631 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1632 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1633 assert_eq!(
1634 operators[2],
1635 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1636 );
1637 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1638 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1639 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1640 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1641 assert_eq!(
1642 operators[7],
1643 ContentOperation::SetGraphicsStateParams("GS1".to_string())
1644 );
1645 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1646 assert_eq!(
1647 operators[9],
1648 ContentOperation::SetIntent("Perceptual".to_string())
1649 );
1650 }
1651
1652 #[test]
1653 fn test_all_path_construction_operators() {
1654 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1655 let operators = ContentParser::parse(content).unwrap();
1656
1657 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1658 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1659 assert_eq!(
1660 operators[2],
1661 ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1662 );
1663 assert_eq!(
1664 operators[3],
1665 ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1666 );
1667 assert_eq!(
1668 operators[4],
1669 ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1670 );
1671 assert_eq!(operators[5], ContentOperation::ClosePath);
1672 assert_eq!(
1673 operators[6],
1674 ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1675 );
1676 }
1677
1678 #[test]
1679 fn test_all_path_painting_operators() {
1680 let content = b"S s f F f* B B* b b* n W W*";
1681 let operators = ContentParser::parse(content).unwrap();
1682
1683 assert_eq!(operators[0], ContentOperation::Stroke);
1684 assert_eq!(operators[1], ContentOperation::CloseStroke);
1685 assert_eq!(operators[2], ContentOperation::Fill);
1686 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1688 assert_eq!(operators[5], ContentOperation::FillStroke);
1689 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1690 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1691 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1692 assert_eq!(operators[9], ContentOperation::EndPath);
1693 assert_eq!(operators[10], ContentOperation::Clip);
1694 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1695 }
1696
1697 #[test]
1698 fn test_all_color_operators() {
1699 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1701 let operators = ContentParser::parse(content).unwrap();
1702
1703 assert_eq!(
1704 operators[0],
1705 ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1706 );
1707 assert_eq!(
1708 operators[1],
1709 ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1710 );
1711 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1712 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1713 assert_eq!(
1714 operators[4],
1715 ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1716 );
1717 assert_eq!(
1718 operators[5],
1719 ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1720 );
1721 assert_eq!(
1722 operators[6],
1723 ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1724 );
1725 assert_eq!(
1726 operators[7],
1727 ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1728 );
1729 assert_eq!(
1730 operators[8],
1731 ContentOperation::ShadingFill("Shade1".to_string())
1732 );
1733 }
1734
1735 #[test]
1736 fn test_xobject_and_marked_content_operators() {
1737 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1739 let operators = ContentParser::parse(content).unwrap();
1740
1741 assert_eq!(
1742 operators[0],
1743 ContentOperation::PaintXObject("Image1".to_string())
1744 );
1745 assert_eq!(
1746 operators[1],
1747 ContentOperation::BeginMarkedContent("MC1".to_string())
1748 );
1749 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1750 assert_eq!(
1751 operators[3],
1752 ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1753 );
1754 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1755 assert_eq!(operators[5], ContentOperation::EndCompatibility);
1756 }
1757
1758 #[test]
1759 fn test_complex_content_stream() {
1760 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1761 let operators = ContentParser::parse(content).unwrap();
1762
1763 assert_eq!(operators.len(), 8);
1764 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1765 assert_eq!(
1766 operators[1],
1767 ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1768 );
1769 assert_eq!(operators[2], ContentOperation::BeginText);
1770 assert_eq!(
1771 operators[3],
1772 ContentOperation::SetFont("F1".to_string(), 12.0)
1773 );
1774 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1775 assert_eq!(
1776 operators[5],
1777 ContentOperation::ShowText(b"Complex".to_vec())
1778 );
1779 assert_eq!(operators[6], ContentOperation::EndText);
1780 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1781 }
1782
1783 #[test]
1784 fn test_tokenizer_whitespace_handling() {
1785 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
1786 let mut tokenizer = ContentTokenizer::new(input);
1787
1788 assert_eq!(
1789 tokenizer.next_token().unwrap(),
1790 Some(Token::Operator("BT".to_string()))
1791 );
1792 assert_eq!(
1793 tokenizer.next_token().unwrap(),
1794 Some(Token::Name("F1".to_string()))
1795 );
1796 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1797 assert_eq!(
1798 tokenizer.next_token().unwrap(),
1799 Some(Token::Operator("Tf".to_string()))
1800 );
1801 assert_eq!(
1802 tokenizer.next_token().unwrap(),
1803 Some(Token::Operator("ET".to_string()))
1804 );
1805 assert_eq!(tokenizer.next_token().unwrap(), None);
1806 }
1807
1808 #[test]
1809 fn test_tokenizer_edge_cases() {
1810 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1812 let mut tokenizer = ContentTokenizer::new(input);
1813
1814 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1815 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1816 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1817 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1818 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1819 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1820 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1821 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1822 }
1823
1824 #[test]
1825 fn test_string_parsing_edge_cases() {
1826 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1827 let mut tokenizer = ContentTokenizer::new(input);
1828
1829 assert_eq!(
1830 tokenizer.next_token().unwrap(),
1831 Some(Token::String(b"Simple".to_vec()))
1832 );
1833 assert_eq!(
1834 tokenizer.next_token().unwrap(),
1835 Some(Token::String(b"With\\backslash".to_vec()))
1836 );
1837 assert_eq!(
1838 tokenizer.next_token().unwrap(),
1839 Some(Token::String(b"With)paren".to_vec()))
1840 );
1841 assert_eq!(
1842 tokenizer.next_token().unwrap(),
1843 Some(Token::String(b"With\newline".to_vec()))
1844 );
1845 assert_eq!(
1846 tokenizer.next_token().unwrap(),
1847 Some(Token::String(b"With\ttab".to_vec()))
1848 );
1849 assert_eq!(
1850 tokenizer.next_token().unwrap(),
1851 Some(Token::String(b"With\rcarriage".to_vec()))
1852 );
1853 assert_eq!(
1854 tokenizer.next_token().unwrap(),
1855 Some(Token::String(b"With\x08backspace".to_vec()))
1856 );
1857 assert_eq!(
1858 tokenizer.next_token().unwrap(),
1859 Some(Token::String(b"With\x0Cformfeed".to_vec()))
1860 );
1861 assert_eq!(
1862 tokenizer.next_token().unwrap(),
1863 Some(Token::String(b"With(leftparen".to_vec()))
1864 );
1865 assert_eq!(
1866 tokenizer.next_token().unwrap(),
1867 Some(Token::String(b"With)rightparen".to_vec()))
1868 );
1869 }
1870
1871 #[test]
1872 fn test_hex_string_parsing() {
1873 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1874 let mut tokenizer = ContentTokenizer::new(input);
1875
1876 assert_eq!(
1877 tokenizer.next_token().unwrap(),
1878 Some(Token::HexString(b"Hello".to_vec()))
1879 );
1880 assert_eq!(
1881 tokenizer.next_token().unwrap(),
1882 Some(Token::HexString(b"Hello".to_vec()))
1883 );
1884 assert_eq!(
1885 tokenizer.next_token().unwrap(),
1886 Some(Token::HexString(b"HelloW".to_vec()))
1887 );
1888 assert_eq!(
1889 tokenizer.next_token().unwrap(),
1890 Some(Token::HexString(b"Hello\x50".to_vec()))
1891 );
1892 }
1893
1894 #[test]
1895 fn test_name_parsing_edge_cases() {
1896 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1897 let mut tokenizer = ContentTokenizer::new(input);
1898
1899 assert_eq!(
1900 tokenizer.next_token().unwrap(),
1901 Some(Token::Name("Name".to_string()))
1902 );
1903 assert_eq!(
1904 tokenizer.next_token().unwrap(),
1905 Some(Token::Name("Name with spaces".to_string()))
1906 );
1907 assert_eq!(
1908 tokenizer.next_token().unwrap(),
1909 Some(Token::Name("Name#with#hash".to_string()))
1910 );
1911 assert_eq!(
1912 tokenizer.next_token().unwrap(),
1913 Some(Token::Name("Name/with/slash".to_string()))
1914 );
1915 assert_eq!(
1916 tokenizer.next_token().unwrap(),
1917 Some(Token::Name("EmptyName".to_string()))
1918 );
1919 }
1920
1921 #[test]
1922 fn test_operator_parsing_edge_cases() {
1923 let content = b"q q q Q Q Q BT BT ET ET";
1924 let operators = ContentParser::parse(content).unwrap();
1925
1926 assert_eq!(operators.len(), 10);
1927 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1928 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1929 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1930 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1931 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1932 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1933 assert_eq!(operators[6], ContentOperation::BeginText);
1934 assert_eq!(operators[7], ContentOperation::BeginText);
1935 assert_eq!(operators[8], ContentOperation::EndText);
1936 assert_eq!(operators[9], ContentOperation::EndText);
1937 }
1938
1939 #[test]
1940 fn test_error_handling_insufficient_operands() {
1941 let content = b"100 Td"; let result = ContentParser::parse(content);
1943 assert!(result.is_err());
1944 }
1945
1946 #[test]
1947 fn test_error_handling_invalid_operator() {
1948 let content = b"100 200 INVALID";
1949 let result = ContentParser::parse(content);
1950 assert!(result.is_err());
1951 }
1952
1953 #[test]
1954 fn test_error_handling_malformed_string() {
1955 let input = b"(Unclosed string";
1957 let mut tokenizer = ContentTokenizer::new(input);
1958 let result = tokenizer.next_token();
1959 assert!(result.is_ok() || result.is_err());
1962 }
1963
1964 #[test]
1965 fn test_error_handling_malformed_hex_string() {
1966 let input = b"<48656C6C6G>";
1967 let mut tokenizer = ContentTokenizer::new(input);
1968 let result = tokenizer.next_token();
1969 assert!(result.is_err());
1970 }
1971
1972 #[test]
1973 fn test_error_handling_malformed_name() {
1974 let input = b"/Name#GG";
1975 let mut tokenizer = ContentTokenizer::new(input);
1976 let result = tokenizer.next_token();
1977 assert!(result.is_err());
1978 }
1979
1980 #[test]
1981 fn test_empty_content_stream() {
1982 let content = b"";
1983 let operators = ContentParser::parse(content).unwrap();
1984 assert_eq!(operators.len(), 0);
1985 }
1986
1987 #[test]
1988 fn test_whitespace_only_content_stream() {
1989 let content = b" \t\n\r ";
1990 let operators = ContentParser::parse(content).unwrap();
1991 assert_eq!(operators.len(), 0);
1992 }
1993
1994 #[test]
1995 fn test_mixed_integer_and_real_operands() {
1996 let content = b"100 200 m 150 200 l";
1998 let operators = ContentParser::parse(content).unwrap();
1999
2000 assert_eq!(operators.len(), 2);
2001 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2002 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2003 }
2004
2005 #[test]
2006 fn test_negative_operands() {
2007 let content = b"-100 -200 Td -50.5 -75.2 TD";
2008 let operators = ContentParser::parse(content).unwrap();
2009
2010 assert_eq!(operators.len(), 2);
2011 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2012 assert_eq!(
2013 operators[1],
2014 ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2015 );
2016 }
2017
2018 #[test]
2019 fn test_large_numbers() {
2020 let content = b"999999.999999 -999999.999999 m";
2021 let operators = ContentParser::parse(content).unwrap();
2022
2023 assert_eq!(operators.len(), 1);
2024 assert_eq!(
2025 operators[0],
2026 ContentOperation::MoveTo(999999.999999, -999999.999999)
2027 );
2028 }
2029
2030 #[test]
2031 fn test_scientific_notation() {
2032 let content = b"123.45 -456.78 m";
2034 let operators = ContentParser::parse(content).unwrap();
2035
2036 assert_eq!(operators.len(), 1);
2037 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2038 }
2039
2040 #[test]
2041 fn test_show_text_array_complex() {
2042 let content = b"(Hello) TJ";
2044 let result = ContentParser::parse(content);
2045 assert!(result.is_err());
2047 }
2048
2049 #[test]
2050 fn test_dash_pattern_empty() {
2051 let content = b"0 d";
2053 let result = ContentParser::parse(content);
2054 assert!(result.is_err());
2056 }
2057
2058 #[test]
2059 fn test_dash_pattern_complex() {
2060 let content = b"2.5 d";
2062 let result = ContentParser::parse(content);
2063 assert!(result.is_err());
2065 }
2066
2067 #[test]
2068 fn test_pop_array_removes_array_end() {
2069 let parser = ContentParser::new(b"");
2071
2072 let mut operands = vec![
2074 Token::ArrayStart,
2075 Token::Integer(1),
2076 Token::Integer(2),
2077 Token::Integer(3),
2078 Token::ArrayEnd,
2079 ];
2080 let result = parser.pop_array(&mut operands).unwrap();
2081 assert_eq!(result.len(), 3);
2082 assert!(operands.is_empty());
2083
2084 let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2086 let result = parser.pop_array(&mut operands).unwrap();
2087 assert_eq!(result.len(), 2);
2088 assert!(operands.is_empty());
2089 }
2090
2091 #[test]
2092 fn test_dash_array_parsing_valid() {
2093 let parser = ContentParser::new(b"");
2095
2096 let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2098 let result = parser.parse_dash_array(valid_tokens).unwrap();
2099 assert_eq!(result, vec![3.0, 2.0]);
2100
2101 let empty_tokens = vec![];
2103 let result = parser.parse_dash_array(empty_tokens).unwrap();
2104 let expected: Vec<f32> = vec![];
2105 assert_eq!(result, expected);
2106 }
2107
2108 #[test]
2109 fn test_text_array_parsing_valid() {
2110 let parser = ContentParser::new(b"");
2112
2113 let valid_tokens = vec![
2115 Token::String(b"Hello".to_vec()),
2116 Token::Number(-100.0),
2117 Token::String(b"World".to_vec()),
2118 ];
2119 let result = parser.parse_text_array(valid_tokens).unwrap();
2120 assert_eq!(result.len(), 3);
2121 }
2122
2123 #[test]
2124 fn test_inline_image_handling() {
2125 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2126 let operators = ContentParser::parse(content).unwrap();
2127
2128 assert_eq!(operators.len(), 1);
2129 match &operators[0] {
2130 ContentOperation::InlineImage { params, data: _ } => {
2131 assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2133 assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2134 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2135 assert_eq!(
2136 params.get("ColorSpace"),
2137 Some(&Object::Name("DeviceRGB".to_string()))
2138 );
2139 }
2141 _ => panic!("Expected InlineImage operation"),
2142 }
2143 }
2144
2145 #[test]
2146 fn test_inline_image_with_filter() {
2147 let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2148 let operators = ContentParser::parse(content).unwrap();
2149
2150 assert_eq!(operators.len(), 1);
2151 match &operators[0] {
2152 ContentOperation::InlineImage { params, data: _ } => {
2153 assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2154 assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2155 assert_eq!(
2156 params.get("ColorSpace"),
2157 Some(&Object::Name("DeviceGray".to_string()))
2158 );
2159 assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2160 assert_eq!(
2161 params.get("Filter"),
2162 Some(&Object::Name("ASCIIHexDecode".to_string()))
2163 );
2164 }
2165 _ => panic!("Expected InlineImage operation"),
2166 }
2167 }
2168
2169 #[test]
2170 fn test_content_parser_performance() {
2171 let mut content = Vec::new();
2172 for i in 0..1000 {
2173 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2174 }
2175
2176 let start = std::time::Instant::now();
2177 let operators = ContentParser::parse(&content).unwrap();
2178 let duration = start.elapsed();
2179
2180 assert_eq!(operators.len(), 1000);
2181 assert!(duration.as_millis() < 100); }
2183
2184 #[test]
2185 fn test_tokenizer_performance() {
2186 let mut input = Vec::new();
2187 for i in 0..1000 {
2188 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2189 }
2190
2191 let start = std::time::Instant::now();
2192 let mut tokenizer = ContentTokenizer::new(&input);
2193 let mut count = 0;
2194 while tokenizer.next_token().unwrap().is_some() {
2195 count += 1;
2196 }
2197 let duration = start.elapsed();
2198
2199 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
2202
2203 #[test]
2204 fn test_memory_usage_large_content() {
2205 let mut content = Vec::new();
2206 for i in 0..10000 {
2207 content.extend_from_slice(
2208 format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2209 .as_bytes(),
2210 );
2211 }
2212
2213 let operators = ContentParser::parse(&content).unwrap();
2214 assert_eq!(operators.len(), 10000);
2215
2216 for op in operators {
2218 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2219 }
2220 }
2221
2222 #[test]
2223 fn test_concurrent_parsing() {
2224 use std::sync::Arc;
2225 use std::thread;
2226
2227 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2228 let handles: Vec<_> = (0..10)
2229 .map(|_| {
2230 let content_clone = content.clone();
2231 thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2232 })
2233 .collect();
2234
2235 for handle in handles {
2236 let operators = handle.join().unwrap();
2237 assert_eq!(operators.len(), 5);
2238 assert_eq!(operators[0], ContentOperation::BeginText);
2239 assert_eq!(operators[4], ContentOperation::EndText);
2240 }
2241 }
2242
2243 #[test]
2246 fn test_tokenizer_hex_string_edge_cases() {
2247 let mut tokenizer = ContentTokenizer::new(b"<>");
2248 let token = tokenizer.next_token().unwrap().unwrap();
2249 match token {
2250 Token::HexString(data) => assert!(data.is_empty()),
2251 _ => panic!("Expected empty hex string"),
2252 }
2253
2254 let mut tokenizer = ContentTokenizer::new(b"<123>");
2256 let token = tokenizer.next_token().unwrap().unwrap();
2257 match token {
2258 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2259 _ => panic!("Expected hex string with odd digits"),
2260 }
2261
2262 let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2264 let token = tokenizer.next_token().unwrap().unwrap();
2265 match token {
2266 Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2267 _ => panic!("Expected hex string with whitespace"),
2268 }
2269 }
2270
2271 #[test]
2272 fn test_tokenizer_literal_string_escape_sequences() {
2273 let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2275 let token = tokenizer.next_token().unwrap().unwrap();
2276 match token {
2277 Token::String(data) => {
2278 assert_eq!(
2279 data,
2280 vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2281 );
2282 }
2283 _ => panic!("Expected string with escapes"),
2284 }
2285
2286 let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2288 let token = tokenizer.next_token().unwrap().unwrap();
2289 match token {
2290 Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2291 _ => panic!("Expected string with octal escapes"),
2292 }
2293 }
2294
2295 #[test]
2296 fn test_tokenizer_nested_parentheses() {
2297 let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2298 let token = tokenizer.next_token().unwrap().unwrap();
2299 match token {
2300 Token::String(data) => {
2301 assert_eq!(data, b"outer (inner) text");
2302 }
2303 _ => panic!("Expected string with nested parentheses"),
2304 }
2305
2306 let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2308 let token = tokenizer.next_token().unwrap().unwrap();
2309 match token {
2310 Token::String(data) => {
2311 assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2312 }
2313 _ => panic!("Expected string with deep nesting"),
2314 }
2315 }
2316
2317 #[test]
2318 fn test_tokenizer_name_hex_escapes() {
2319 let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2320 let token = tokenizer.next_token().unwrap().unwrap();
2321 match token {
2322 Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2323 _ => panic!("Expected name with hex escapes"),
2324 }
2325
2326 let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2328 let token = tokenizer.next_token().unwrap().unwrap();
2329 match token {
2330 Token::Name(name) => assert_eq!(name, "Special/()<>"),
2331 _ => panic!("Expected name with special character escapes"),
2332 }
2333 }
2334
2335 #[test]
2336 fn test_tokenizer_number_edge_cases() {
2337 let mut tokenizer = ContentTokenizer::new(b"2147483647");
2339 let token = tokenizer.next_token().unwrap().unwrap();
2340 match token {
2341 Token::Integer(n) => assert_eq!(n, 2147483647),
2342 _ => panic!("Expected large integer"),
2343 }
2344
2345 let mut tokenizer = ContentTokenizer::new(b"0.00001");
2347 let token = tokenizer.next_token().unwrap().unwrap();
2348 match token {
2349 Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2350 _ => panic!("Expected small float"),
2351 }
2352
2353 let mut tokenizer = ContentTokenizer::new(b".5");
2355 let token = tokenizer.next_token().unwrap().unwrap();
2356 match token {
2357 Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2358 _ => panic!("Expected float starting with dot"),
2359 }
2360 }
2361
2362 #[test]
2363 fn test_parser_complex_path_operations() {
2364 let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2365 let operators = ContentParser::parse(content).unwrap();
2366
2367 assert_eq!(operators.len(), 6);
2368 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2369 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2370 assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2371 assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2372 assert_eq!(operators[4], ContentOperation::ClosePath);
2373 assert_eq!(operators[5], ContentOperation::Fill);
2374 }
2375
2376 #[test]
2377 fn test_parser_bezier_curves() {
2378 let content = b"100 100 150 50 200 150 c";
2379 let operators = ContentParser::parse(content).unwrap();
2380
2381 assert_eq!(operators.len(), 1);
2382 match &operators[0] {
2383 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2384 assert!(x1.is_finite() && y1.is_finite());
2388 assert!(x2.is_finite() && y2.is_finite());
2389 assert!(x3.is_finite() && y3.is_finite());
2390 assert!(*x1 >= 50.0 && *x1 <= 200.0);
2392 assert!(*y1 >= 50.0 && *y1 <= 200.0);
2393 }
2394 _ => panic!("Expected CurveTo operation"),
2395 }
2396 }
2397
2398 #[test]
2399 fn test_parser_color_operations() {
2400 let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2401 let operators = ContentParser::parse(content).unwrap();
2402
2403 assert_eq!(operators.len(), 5);
2404 match &operators[0] {
2405 ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2406 _ => panic!("Expected SetNonStrokingGray"),
2407 }
2408 match &operators[1] {
2409 ContentOperation::SetNonStrokingRGB(r, g, b) => {
2410 assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2411 }
2412 _ => panic!("Expected SetNonStrokingRGB"),
2413 }
2414 }
2415
2416 #[test]
2417 fn test_parser_text_positioning_advanced() {
2418 let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2419 let operators = ContentParser::parse(content).unwrap();
2420
2421 assert_eq!(operators.len(), 7);
2422 assert_eq!(operators[0], ContentOperation::BeginText);
2423 match &operators[1] {
2424 ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2425 assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2426 }
2427 _ => panic!("Expected SetTextMatrix"),
2428 }
2429 assert_eq!(operators[6], ContentOperation::EndText);
2430 }
2431
2432 #[test]
2433 fn test_parser_graphics_state_operations() {
2434 let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2435 let operators = ContentParser::parse(content).unwrap();
2436
2437 assert_eq!(operators.len(), 7);
2438 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2439 match &operators[1] {
2440 ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2441 assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2442 }
2443 _ => panic!("Expected SetTransformMatrix"),
2444 }
2445 assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2446 }
2447
2448 #[test]
2449 fn test_parser_xobject_operations() {
2450 let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2451 let operators = ContentParser::parse(content).unwrap();
2452
2453 assert_eq!(operators.len(), 3);
2454 for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2455 match &operators[i] {
2456 ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2457 _ => panic!("Expected PaintXObject"),
2458 }
2459 }
2460 }
2461
2462 #[test]
2463 fn test_parser_marked_content_operations() {
2464 let content = b"/P BMC (Tagged content) Tj EMC";
2465 let operators = ContentParser::parse(content).unwrap();
2466
2467 assert_eq!(operators.len(), 3);
2468 match &operators[0] {
2469 ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2470 _ => panic!("Expected BeginMarkedContent"),
2471 }
2472 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2473 }
2474
2475 #[test]
2476 fn test_parser_error_handling_invalid_operators() {
2477 let content = b"m";
2479 let result = ContentParser::parse(content);
2480 assert!(result.is_err());
2481
2482 let content = b"<ABC DEF BT";
2484 let result = ContentParser::parse(content);
2485 assert!(result.is_err());
2486
2487 let content = b"100 200 300"; let result = ContentParser::parse(content);
2490 assert!(result.is_ok()); }
2492
2493 #[test]
2494 fn test_parser_whitespace_tolerance() {
2495 let content = b" \n\t 100 \r\n 200 \t m \n";
2496 let operators = ContentParser::parse(content).unwrap();
2497
2498 assert_eq!(operators.len(), 1);
2499 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2500 }
2501
2502 #[test]
2503 fn test_tokenizer_comment_handling() {
2504 let content = b"100 % This is a comment\n200 m % Another comment";
2505 let operators = ContentParser::parse(content).unwrap();
2506
2507 assert_eq!(operators.len(), 1);
2508 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2509 }
2510
2511 #[test]
2512 fn test_parser_stream_with_binary_data() {
2513 let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2515
2516 let operators = ContentParser::parse(content).unwrap();
2517 assert_eq!(operators.len(), 2);
2518 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2519 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2520 }
2521
2522 #[test]
2523 fn test_tokenizer_array_parsing() {
2524 let content = b"100 200 m 150 250 l";
2526 let operators = ContentParser::parse(content).unwrap();
2527
2528 assert_eq!(operators.len(), 2);
2529 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2530 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2531 }
2532
2533 #[test]
2534 fn test_parser_rectangle_operations() {
2535 let content = b"10 20 100 50 re 0 0 200 300 re";
2536 let operators = ContentParser::parse(content).unwrap();
2537
2538 assert_eq!(operators.len(), 2);
2539 match &operators[0] {
2540 ContentOperation::Rectangle(x, y, width, height) => {
2541 assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2542 }
2543 _ => panic!("Expected Rectangle operation"),
2544 }
2545 match &operators[1] {
2546 ContentOperation::Rectangle(x, y, width, height) => {
2547 assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2548 }
2549 _ => panic!("Expected Rectangle operation"),
2550 }
2551 }
2552
2553 #[test]
2554 fn test_parser_clipping_operations() {
2555 let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2556 let operators = ContentParser::parse(content).unwrap();
2557
2558 assert_eq!(operators.len(), 6);
2559 assert_eq!(operators[1], ContentOperation::Clip);
2560 assert_eq!(operators[2], ContentOperation::EndPath);
2561 assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2562 assert_eq!(operators[5], ContentOperation::EndPath);
2563 }
2564
2565 #[test]
2566 fn test_parser_painting_operations() {
2567 let content = b"S s f f* B B* b b*";
2568 let operators = ContentParser::parse(content).unwrap();
2569
2570 assert_eq!(operators.len(), 8);
2571 assert_eq!(operators[0], ContentOperation::Stroke);
2572 assert_eq!(operators[1], ContentOperation::CloseStroke);
2573 assert_eq!(operators[2], ContentOperation::Fill);
2574 assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2575 assert_eq!(operators[4], ContentOperation::FillStroke);
2576 assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2577 assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2578 assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2579 }
2580
2581 #[test]
2582 fn test_parser_line_style_operations() {
2583 let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2584 let operators = ContentParser::parse(content).unwrap();
2585
2586 assert_eq!(operators.len(), 5);
2587 assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2588 assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2589 assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2590 assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2591 }
2593
2594 #[test]
2595 fn test_parser_text_state_operations() {
2596 let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2597 let operators = ContentParser::parse(content).unwrap();
2598
2599 assert_eq!(operators.len(), 5);
2600 assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2601 assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2602 assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2603 assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2604 assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2605 }
2606
2607 #[test]
2608 fn test_parser_unicode_text() {
2609 let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2610 let operators = ContentParser::parse(content).unwrap();
2611
2612 assert_eq!(operators.len(), 3);
2613 assert_eq!(operators[0], ContentOperation::BeginText);
2614 match &operators[1] {
2615 ContentOperation::ShowText(text) => {
2616 assert!(text.len() > 5); }
2618 _ => panic!("Expected ShowText operation"),
2619 }
2620 assert_eq!(operators[2], ContentOperation::EndText);
2621 }
2622
2623 #[test]
2624 fn test_parser_stress_test_large_coordinates() {
2625 let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2626 let operators = ContentParser::parse(content).unwrap();
2627
2628 assert_eq!(operators.len(), 1);
2629 match &operators[0] {
2630 ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2631 assert!((*_x1 - 999999.999).abs() < 0.1);
2632 assert!((*_y1 - (-999999.999)).abs() < 0.1);
2633 assert!((*_x3 - 999999.999).abs() < 0.1);
2634 }
2635 _ => panic!("Expected CurveTo operation"),
2636 }
2637 }
2638
2639 #[test]
2640 fn test_parser_empty_content_stream() {
2641 let content = b"";
2642 let operators = ContentParser::parse(content).unwrap();
2643 assert!(operators.is_empty());
2644
2645 let content = b" \n\t\r ";
2646 let operators = ContentParser::parse(content).unwrap();
2647 assert!(operators.is_empty());
2648 }
2649
2650 #[test]
2651 fn test_tokenizer_error_recovery() {
2652 let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2654 let result = ContentParser::parse(content);
2655 assert!(result.is_ok() || result.is_err());
2657 }
2658
2659 #[test]
2660 fn test_parser_optimization_repeated_operations() {
2661 let mut content = Vec::new();
2663 for i in 0..1000 {
2664 content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2665 }
2666
2667 let start = std::time::Instant::now();
2668 let operators = ContentParser::parse(&content).unwrap();
2669 let duration = start.elapsed();
2670
2671 assert_eq!(operators.len(), 1000);
2672 assert!(duration.as_millis() < 200); }
2674
2675 #[test]
2676 fn test_parser_memory_efficiency_large_strings() {
2677 let large_text = "A".repeat(10000);
2679 let content = format!("BT ({}) Tj ET", large_text);
2680 let operators = ContentParser::parse(content.as_bytes()).unwrap();
2681
2682 assert_eq!(operators.len(), 3);
2683 match &operators[1] {
2684 ContentOperation::ShowText(text) => {
2685 assert_eq!(text.len(), 10000);
2686 }
2687 _ => panic!("Expected ShowText operation"),
2688 }
2689 }
2690 }
2691
2692 #[test]
2693 fn test_content_stream_too_large() {
2694 let mut large_content = Vec::new();
2696
2697 for i in 0..10000 {
2699 large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
2700 }
2701 large_content.extend_from_slice(b"S");
2702
2703 let result = ContentParser::parse_content(&large_content);
2705 assert!(result.is_ok());
2706
2707 let operations = result.unwrap();
2708 assert!(operations.len() > 10000);
2710 }
2711
2712 #[test]
2713 fn test_invalid_operator_handling() {
2714 let content = b"100 200 INVALID_OP 300 400 m";
2716 let result = ContentParser::parse_content(content);
2717
2718 if let Ok(operations) = result {
2720 assert!(operations
2722 .iter()
2723 .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
2724 }
2725 }
2726
2727 #[test]
2728 fn test_nested_arrays_malformed() {
2729 let content = b"[[(Hello] [World)]] TJ";
2731 let result = ContentParser::parse_content(content);
2732
2733 assert!(result.is_ok() || result.is_err());
2735 }
2736
2737 #[test]
2738 fn test_escape_sequences_in_strings() {
2739 let test_cases = vec![
2741 (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
2742 (b"(\\\\)".as_slice(), b"\\".as_slice()),
2743 (b"(\\(\\))".as_slice(), b"()".as_slice()),
2744 (b"(\\123)".as_slice(), b"S".as_slice()), (b"(\\0)".as_slice(), b"\0".as_slice()),
2746 ];
2747
2748 for (input, expected) in test_cases {
2749 let mut content = Vec::new();
2750 content.extend_from_slice(input);
2751 content.extend_from_slice(b" Tj");
2752
2753 let result = ContentParser::parse_content(&content);
2754 assert!(result.is_ok());
2755
2756 let operations = result.unwrap();
2757 if let ContentOperation::ShowText(text) = &operations[0] {
2758 assert_eq!(text, expected, "Failed for input: {:?}", input);
2759 } else {
2760 panic!("Expected ShowText operation");
2761 }
2762 }
2763 }
2764
2765 #[test]
2766 fn test_content_with_inline_images() {
2767 let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
2769 let result = ContentParser::parse_content(content);
2770
2771 assert!(result.is_ok() || result.is_err());
2773 }
2774
2775 #[test]
2776 fn test_operator_with_missing_operands() {
2777 let test_cases = vec![
2779 b"Tj" as &[u8], b"m", b"rg", b"Tf", ];
2784
2785 for content in test_cases {
2786 let result = ContentParser::parse_content(content);
2787 assert!(result.is_ok() || result.is_err());
2789 }
2790 }
2791}