1use super::{ParseError, ParseResult};
57use std::collections::HashMap;
58
59#[derive(Debug, Clone, PartialEq)]
93pub enum ContentOperation {
94 BeginText,
98
99 EndText,
102
103 SetCharSpacing(f32),
107
108 SetWordSpacing(f32),
111
112 SetHorizontalScaling(f32),
115
116 SetLeading(f32),
119
120 SetFont(String, f32),
123
124 SetTextRenderMode(i32),
127
128 SetTextRise(f32),
131
132 MoveText(f32, f32),
136
137 MoveTextSetLeading(f32, f32),
140
141 SetTextMatrix(f32, f32, f32, f32, f32, f32),
144
145 NextLine,
148
149 ShowText(Vec<u8>),
153
154 ShowTextArray(Vec<TextElement>),
157
158 NextLineShowText(Vec<u8>),
161
162 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
165
166 SaveGraphicsState,
170
171 RestoreGraphicsState,
174
175 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
178
179 SetLineWidth(f32),
181
182 SetLineCap(i32),
185
186 SetLineJoin(i32),
189
190 SetMiterLimit(f32),
193
194 SetDashPattern(Vec<f32>, f32),
197
198 SetIntent(String),
201
202 SetFlatness(f32),
205
206 SetGraphicsStateParams(String),
209
210 MoveTo(f32, f32),
213
214 LineTo(f32, f32),
216
217 CurveTo(f32, f32, f32, f32, f32, f32),
220
221 CurveToV(f32, f32, f32, f32),
223
224 CurveToY(f32, f32, f32, f32),
226
227 ClosePath,
230
231 Rectangle(f32, f32, f32, f32),
234
235 Stroke,
238
239 CloseStroke,
242
243 Fill,
245
246 FillEvenOdd,
248
249 FillStroke,
252
253 FillStrokeEvenOdd,
255
256 CloseFillStroke,
259
260 CloseFillStrokeEvenOdd,
262
263 EndPath,
266
267 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
275
276 SetNonStrokingColorSpace(String),
279
280 SetStrokingColor(Vec<f32>),
283
284 SetNonStrokingColor(Vec<f32>),
287
288 SetStrokingGray(f32),
291
292 SetNonStrokingGray(f32),
294
295 SetStrokingRGB(f32, f32, f32),
298
299 SetNonStrokingRGB(f32, f32, f32),
301
302 SetStrokingCMYK(f32, f32, f32, f32),
304
305 SetNonStrokingCMYK(f32, f32, f32, f32),
307
308 ShadingFill(String), BeginInlineImage, InlineImageData(Vec<u8>), PaintXObject(String),
319
320 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
331
332#[derive(Debug, Clone, PartialEq)]
351pub enum TextElement {
352 Text(Vec<u8>),
354 Spacing(f32),
357}
358
359#[derive(Debug, Clone, PartialEq)]
361pub(super) enum Token {
362 Number(f32),
363 Integer(i32),
364 String(Vec<u8>),
365 HexString(Vec<u8>),
366 Name(String),
367 Operator(String),
368 ArrayStart,
369 ArrayEnd,
370 DictStart,
371 DictEnd,
372}
373
374pub struct ContentTokenizer<'a> {
376 input: &'a [u8],
377 position: usize,
378}
379
380impl<'a> ContentTokenizer<'a> {
381 pub fn new(input: &'a [u8]) -> Self {
383 Self { input, position: 0 }
384 }
385
386 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
388 self.skip_whitespace();
389
390 if self.position >= self.input.len() {
391 return Ok(None);
392 }
393
394 let ch = self.input[self.position];
395
396 match ch {
397 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
399
400 b'(' => self.read_literal_string(),
402 b'<' => {
403 if self.peek_next() == Some(b'<') {
404 self.position += 2;
405 Ok(Some(Token::DictStart))
406 } else {
407 self.read_hex_string()
408 }
409 }
410 b'>' => {
411 if self.peek_next() == Some(b'>') {
412 self.position += 2;
413 Ok(Some(Token::DictEnd))
414 } else {
415 Err(ParseError::SyntaxError {
416 position: self.position,
417 message: "Unexpected '>'".to_string(),
418 })
419 }
420 }
421
422 b'[' => {
424 self.position += 1;
425 Ok(Some(Token::ArrayStart))
426 }
427 b']' => {
428 self.position += 1;
429 Ok(Some(Token::ArrayEnd))
430 }
431
432 b'/' => self.read_name(),
434
435 _ => self.read_operator(),
437 }
438 }
439
440 fn skip_whitespace(&mut self) {
441 while self.position < self.input.len() {
442 match self.input[self.position] {
443 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
444 b'%' => self.skip_comment(),
445 _ => break,
446 }
447 }
448 }
449
450 fn skip_comment(&mut self) {
451 while self.position < self.input.len() && self.input[self.position] != b'\n' {
452 self.position += 1;
453 }
454 }
455
456 fn peek_next(&self) -> Option<u8> {
457 if self.position + 1 < self.input.len() {
458 Some(self.input[self.position + 1])
459 } else {
460 None
461 }
462 }
463
464 fn read_number(&mut self) -> ParseResult<Option<Token>> {
465 let start = self.position;
466 let mut has_dot = false;
467
468 if self.position < self.input.len()
470 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
471 {
472 self.position += 1;
473 }
474
475 while self.position < self.input.len() {
477 match self.input[self.position] {
478 b'0'..=b'9' => self.position += 1,
479 b'.' if !has_dot => {
480 has_dot = true;
481 self.position += 1;
482 }
483 _ => break,
484 }
485 }
486
487 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
488 ParseError::SyntaxError {
489 position: start,
490 message: "Invalid number format".to_string(),
491 }
492 })?;
493
494 if has_dot {
495 let value = num_str
496 .parse::<f32>()
497 .map_err(|_| ParseError::SyntaxError {
498 position: start,
499 message: "Invalid float number".to_string(),
500 })?;
501 Ok(Some(Token::Number(value)))
502 } else {
503 let value = num_str
504 .parse::<i32>()
505 .map_err(|_| ParseError::SyntaxError {
506 position: start,
507 message: "Invalid integer number".to_string(),
508 })?;
509 Ok(Some(Token::Integer(value)))
510 }
511 }
512
513 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
514 self.position += 1; let mut result = Vec::new();
516 let mut paren_depth = 1;
517 let mut escape = false;
518
519 while self.position < self.input.len() && paren_depth > 0 {
520 let ch = self.input[self.position];
521 self.position += 1;
522
523 if escape {
524 match ch {
525 b'n' => result.push(b'\n'),
526 b'r' => result.push(b'\r'),
527 b't' => result.push(b'\t'),
528 b'b' => result.push(b'\x08'),
529 b'f' => result.push(b'\x0C'),
530 b'(' => result.push(b'('),
531 b')' => result.push(b')'),
532 b'\\' => result.push(b'\\'),
533 b'0'..=b'7' => {
534 self.position -= 1;
536 let octal_value = self.read_octal_escape()?;
537 result.push(octal_value);
538 }
539 _ => result.push(ch), }
541 escape = false;
542 } else {
543 match ch {
544 b'\\' => escape = true,
545 b'(' => {
546 paren_depth += 1;
547 result.push(ch);
548 }
549 b')' => {
550 paren_depth -= 1;
551 if paren_depth > 0 {
552 result.push(ch);
553 }
554 }
555 _ => result.push(ch),
556 }
557 }
558 }
559
560 Ok(Some(Token::String(result)))
561 }
562
563 fn read_octal_escape(&mut self) -> ParseResult<u8> {
564 let mut value = 0u8;
565 let mut count = 0;
566
567 while count < 3 && self.position < self.input.len() {
568 match self.input[self.position] {
569 b'0'..=b'7' => {
570 value = value * 8 + (self.input[self.position] - b'0');
571 self.position += 1;
572 count += 1;
573 }
574 _ => break,
575 }
576 }
577
578 Ok(value)
579 }
580
581 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
582 self.position += 1; let mut result = Vec::new();
584 let mut nibble = None;
585
586 while self.position < self.input.len() {
587 let ch = self.input[self.position];
588
589 match ch {
590 b'>' => {
591 self.position += 1;
592 if let Some(n) = nibble {
594 result.push(n << 4);
595 }
596 return Ok(Some(Token::HexString(result)));
597 }
598 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
599 let digit = if ch <= b'9' {
600 ch - b'0'
601 } else if ch <= b'F' {
602 ch - b'A' + 10
603 } else {
604 ch - b'a' + 10
605 };
606
607 if let Some(n) = nibble {
608 result.push((n << 4) | digit);
609 nibble = None;
610 } else {
611 nibble = Some(digit);
612 }
613 self.position += 1;
614 }
615 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
616 self.position += 1;
618 }
619 _ => {
620 return Err(ParseError::SyntaxError {
621 position: self.position,
622 message: format!("Invalid character in hex string: {:?}", ch as char),
623 });
624 }
625 }
626 }
627
628 Err(ParseError::SyntaxError {
629 position: self.position,
630 message: "Unterminated hex string".to_string(),
631 })
632 }
633
634 fn read_name(&mut self) -> ParseResult<Option<Token>> {
635 self.position += 1; let start = self.position;
637
638 while self.position < self.input.len() {
639 let ch = self.input[self.position];
640 match ch {
641 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
642 | b']' | b'{' | b'}' | b'/' | b'%' => break,
643 b'#' => {
644 self.position += 1;
646 if self.position + 1 < self.input.len() {
647 self.position += 2;
648 }
649 }
650 _ => self.position += 1,
651 }
652 }
653
654 let name_bytes = &self.input[start..self.position];
655 let name = self.decode_name(name_bytes)?;
656 Ok(Some(Token::Name(name)))
657 }
658
659 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
660 let mut result = Vec::new();
661 let mut i = 0;
662
663 while i < bytes.len() {
664 if bytes[i] == b'#' && i + 2 < bytes.len() {
665 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
667 ParseError::SyntaxError {
668 position: self.position,
669 message: "Invalid hex escape in name".to_string(),
670 }
671 })?;
672 let value =
673 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
674 position: self.position,
675 message: "Invalid hex escape in name".to_string(),
676 })?;
677 result.push(value);
678 i += 3;
679 } else {
680 result.push(bytes[i]);
681 i += 1;
682 }
683 }
684
685 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
686 position: self.position,
687 message: "Invalid UTF-8 in name".to_string(),
688 })
689 }
690
691 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
692 let start = self.position;
693
694 while self.position < self.input.len() {
695 let ch = self.input[self.position];
696 match ch {
697 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
698 | b']' | b'{' | b'}' | b'/' | b'%' => break,
699 _ => self.position += 1,
700 }
701 }
702
703 let op_bytes = &self.input[start..self.position];
704 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
705 position: start,
706 message: "Invalid operator".to_string(),
707 })?;
708
709 Ok(Some(Token::Operator(op.to_string())))
710 }
711}
712
713pub struct ContentParser {
732 tokens: Vec<Token>,
733 position: usize,
734}
735
736impl ContentParser {
737 pub fn new(_content: &[u8]) -> Self {
739 Self {
740 tokens: Vec::new(),
741 position: 0,
742 }
743 }
744
745 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
779 Self::parse_content(content)
780 }
781
782 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
787 let mut tokenizer = ContentTokenizer::new(content);
788 let mut tokens = Vec::new();
789
790 while let Some(token) = tokenizer.next_token()? {
792 tokens.push(token);
793 }
794
795 let mut parser = Self {
796 tokens,
797 position: 0,
798 };
799
800 parser.parse_operators()
801 }
802
803 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
804 let mut operators = Vec::new();
805 let mut operand_stack: Vec<Token> = Vec::new();
806
807 while self.position < self.tokens.len() {
808 let token = self.tokens[self.position].clone();
809 self.position += 1;
810
811 match &token {
812 Token::Operator(op) => {
813 let operator = self.parse_operator(op, &mut operand_stack)?;
814 operators.push(operator);
815 }
816 _ => {
817 operand_stack.push(token);
819 }
820 }
821 }
822
823 Ok(operators)
824 }
825
826 fn parse_operator(
827 &mut self,
828 op: &str,
829 operands: &mut Vec<Token>,
830 ) -> ParseResult<ContentOperation> {
831 let operator = match op {
832 "BT" => ContentOperation::BeginText,
834 "ET" => ContentOperation::EndText,
835
836 "Tc" => {
838 let spacing = self.pop_number(operands)?;
839 ContentOperation::SetCharSpacing(spacing)
840 }
841 "Tw" => {
842 let spacing = self.pop_number(operands)?;
843 ContentOperation::SetWordSpacing(spacing)
844 }
845 "Tz" => {
846 let scale = self.pop_number(operands)?;
847 ContentOperation::SetHorizontalScaling(scale)
848 }
849 "TL" => {
850 let leading = self.pop_number(operands)?;
851 ContentOperation::SetLeading(leading)
852 }
853 "Tf" => {
854 let size = self.pop_number(operands)?;
855 let font = self.pop_name(operands)?;
856 ContentOperation::SetFont(font, size)
857 }
858 "Tr" => {
859 let mode = self.pop_integer(operands)?;
860 ContentOperation::SetTextRenderMode(mode)
861 }
862 "Ts" => {
863 let rise = self.pop_number(operands)?;
864 ContentOperation::SetTextRise(rise)
865 }
866
867 "Td" => {
869 let ty = self.pop_number(operands)?;
870 let tx = self.pop_number(operands)?;
871 ContentOperation::MoveText(tx, ty)
872 }
873 "TD" => {
874 let ty = self.pop_number(operands)?;
875 let tx = self.pop_number(operands)?;
876 ContentOperation::MoveTextSetLeading(tx, ty)
877 }
878 "Tm" => {
879 let f = self.pop_number(operands)?;
880 let e = self.pop_number(operands)?;
881 let d = self.pop_number(operands)?;
882 let c = self.pop_number(operands)?;
883 let b = self.pop_number(operands)?;
884 let a = self.pop_number(operands)?;
885 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
886 }
887 "T*" => ContentOperation::NextLine,
888
889 "Tj" => {
891 let text = self.pop_string(operands)?;
892 ContentOperation::ShowText(text)
893 }
894 "TJ" => {
895 let array = self.pop_array(operands)?;
896 let elements = self.parse_text_array(array)?;
897 ContentOperation::ShowTextArray(elements)
898 }
899 "'" => {
900 let text = self.pop_string(operands)?;
901 ContentOperation::NextLineShowText(text)
902 }
903 "\"" => {
904 let text = self.pop_string(operands)?;
905 let aw = self.pop_number(operands)?;
906 let ac = self.pop_number(operands)?;
907 ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
908 }
909
910 "q" => ContentOperation::SaveGraphicsState,
912 "Q" => ContentOperation::RestoreGraphicsState,
913 "cm" => {
914 let f = self.pop_number(operands)?;
915 let e = self.pop_number(operands)?;
916 let d = self.pop_number(operands)?;
917 let c = self.pop_number(operands)?;
918 let b = self.pop_number(operands)?;
919 let a = self.pop_number(operands)?;
920 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
921 }
922 "w" => {
923 let width = self.pop_number(operands)?;
924 ContentOperation::SetLineWidth(width)
925 }
926 "J" => {
927 let cap = self.pop_integer(operands)?;
928 ContentOperation::SetLineCap(cap)
929 }
930 "j" => {
931 let join = self.pop_integer(operands)?;
932 ContentOperation::SetLineJoin(join)
933 }
934 "M" => {
935 let limit = self.pop_number(operands)?;
936 ContentOperation::SetMiterLimit(limit)
937 }
938 "d" => {
939 let phase = self.pop_number(operands)?;
940 let array = self.pop_array(operands)?;
941 let pattern = self.parse_dash_array(array)?;
942 ContentOperation::SetDashPattern(pattern, phase)
943 }
944 "ri" => {
945 let intent = self.pop_name(operands)?;
946 ContentOperation::SetIntent(intent)
947 }
948 "i" => {
949 let flatness = self.pop_number(operands)?;
950 ContentOperation::SetFlatness(flatness)
951 }
952 "gs" => {
953 let name = self.pop_name(operands)?;
954 ContentOperation::SetGraphicsStateParams(name)
955 }
956
957 "m" => {
959 let y = self.pop_number(operands)?;
960 let x = self.pop_number(operands)?;
961 ContentOperation::MoveTo(x, y)
962 }
963 "l" => {
964 let y = self.pop_number(operands)?;
965 let x = self.pop_number(operands)?;
966 ContentOperation::LineTo(x, y)
967 }
968 "c" => {
969 let y3 = self.pop_number(operands)?;
970 let x3 = self.pop_number(operands)?;
971 let y2 = self.pop_number(operands)?;
972 let x2 = self.pop_number(operands)?;
973 let y1 = self.pop_number(operands)?;
974 let x1 = self.pop_number(operands)?;
975 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
976 }
977 "v" => {
978 let y3 = self.pop_number(operands)?;
979 let x3 = self.pop_number(operands)?;
980 let y2 = self.pop_number(operands)?;
981 let x2 = self.pop_number(operands)?;
982 ContentOperation::CurveToV(x2, y2, x3, y3)
983 }
984 "y" => {
985 let y3 = self.pop_number(operands)?;
986 let x3 = self.pop_number(operands)?;
987 let y1 = self.pop_number(operands)?;
988 let x1 = self.pop_number(operands)?;
989 ContentOperation::CurveToY(x1, y1, x3, y3)
990 }
991 "h" => ContentOperation::ClosePath,
992 "re" => {
993 let height = self.pop_number(operands)?;
994 let width = self.pop_number(operands)?;
995 let y = self.pop_number(operands)?;
996 let x = self.pop_number(operands)?;
997 ContentOperation::Rectangle(x, y, width, height)
998 }
999
1000 "S" => ContentOperation::Stroke,
1002 "s" => ContentOperation::CloseStroke,
1003 "f" | "F" => ContentOperation::Fill,
1004 "f*" => ContentOperation::FillEvenOdd,
1005 "B" => ContentOperation::FillStroke,
1006 "B*" => ContentOperation::FillStrokeEvenOdd,
1007 "b" => ContentOperation::CloseFillStroke,
1008 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1009 "n" => ContentOperation::EndPath,
1010
1011 "W" => ContentOperation::Clip,
1013 "W*" => ContentOperation::ClipEvenOdd,
1014
1015 "CS" => {
1017 let name = self.pop_name(operands)?;
1018 ContentOperation::SetStrokingColorSpace(name)
1019 }
1020 "cs" => {
1021 let name = self.pop_name(operands)?;
1022 ContentOperation::SetNonStrokingColorSpace(name)
1023 }
1024 "SC" | "SCN" => {
1025 let components = self.pop_color_components(operands)?;
1026 ContentOperation::SetStrokingColor(components)
1027 }
1028 "sc" | "scn" => {
1029 let components = self.pop_color_components(operands)?;
1030 ContentOperation::SetNonStrokingColor(components)
1031 }
1032 "G" => {
1033 let gray = self.pop_number(operands)?;
1034 ContentOperation::SetStrokingGray(gray)
1035 }
1036 "g" => {
1037 let gray = self.pop_number(operands)?;
1038 ContentOperation::SetNonStrokingGray(gray)
1039 }
1040 "RG" => {
1041 let b = self.pop_number(operands)?;
1042 let g = self.pop_number(operands)?;
1043 let r = self.pop_number(operands)?;
1044 ContentOperation::SetStrokingRGB(r, g, b)
1045 }
1046 "rg" => {
1047 let b = self.pop_number(operands)?;
1048 let g = self.pop_number(operands)?;
1049 let r = self.pop_number(operands)?;
1050 ContentOperation::SetNonStrokingRGB(r, g, b)
1051 }
1052 "K" => {
1053 let k = self.pop_number(operands)?;
1054 let y = self.pop_number(operands)?;
1055 let m = self.pop_number(operands)?;
1056 let c = self.pop_number(operands)?;
1057 ContentOperation::SetStrokingCMYK(c, m, y, k)
1058 }
1059 "k" => {
1060 let k = self.pop_number(operands)?;
1061 let y = self.pop_number(operands)?;
1062 let m = self.pop_number(operands)?;
1063 let c = self.pop_number(operands)?;
1064 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1065 }
1066
1067 "sh" => {
1069 let name = self.pop_name(operands)?;
1070 ContentOperation::ShadingFill(name)
1071 }
1072
1073 "Do" => {
1075 let name = self.pop_name(operands)?;
1076 ContentOperation::PaintXObject(name)
1077 }
1078
1079 "BMC" => {
1081 let tag = self.pop_name(operands)?;
1082 ContentOperation::BeginMarkedContent(tag)
1083 }
1084 "BDC" => {
1085 let props = self.pop_dict_or_name(operands)?;
1086 let tag = self.pop_name(operands)?;
1087 ContentOperation::BeginMarkedContentWithProps(tag, props)
1088 }
1089 "EMC" => ContentOperation::EndMarkedContent,
1090 "MP" => {
1091 let tag = self.pop_name(operands)?;
1092 ContentOperation::DefineMarkedContentPoint(tag)
1093 }
1094 "DP" => {
1095 let props = self.pop_dict_or_name(operands)?;
1096 let tag = self.pop_name(operands)?;
1097 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1098 }
1099
1100 "BX" => ContentOperation::BeginCompatibility,
1102 "EX" => ContentOperation::EndCompatibility,
1103
1104 "BI" => {
1106 operands.clear(); self.parse_inline_image()?
1108 }
1109
1110 _ => {
1111 return Err(ParseError::SyntaxError {
1112 position: self.position,
1113 message: format!("Unknown operator: {op}"),
1114 });
1115 }
1116 };
1117
1118 operands.clear(); Ok(operator)
1120 }
1121
1122 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1124 match operands.pop() {
1125 Some(Token::Number(n)) => Ok(n),
1126 Some(Token::Integer(i)) => Ok(i as f32),
1127 _ => Err(ParseError::SyntaxError {
1128 position: self.position,
1129 message: "Expected number operand".to_string(),
1130 }),
1131 }
1132 }
1133
1134 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1135 match operands.pop() {
1136 Some(Token::Integer(i)) => Ok(i),
1137 _ => Err(ParseError::SyntaxError {
1138 position: self.position,
1139 message: "Expected integer operand".to_string(),
1140 }),
1141 }
1142 }
1143
1144 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1145 match operands.pop() {
1146 Some(Token::Name(n)) => Ok(n),
1147 _ => Err(ParseError::SyntaxError {
1148 position: self.position,
1149 message: "Expected name operand".to_string(),
1150 }),
1151 }
1152 }
1153
1154 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1155 match operands.pop() {
1156 Some(Token::String(s)) => Ok(s),
1157 Some(Token::HexString(s)) => Ok(s),
1158 _ => Err(ParseError::SyntaxError {
1159 position: self.position,
1160 message: "Expected string operand".to_string(),
1161 }),
1162 }
1163 }
1164
1165 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1166 let mut array = Vec::new();
1167 let mut found_start = false;
1168
1169 while let Some(token) = operands.pop() {
1171 match token {
1172 Token::ArrayStart => {
1173 found_start = true;
1174 break;
1175 }
1176 _ => array.push(token),
1177 }
1178 }
1179
1180 if !found_start {
1181 return Err(ParseError::SyntaxError {
1182 position: self.position,
1183 message: "Expected array".to_string(),
1184 });
1185 }
1186
1187 array.reverse(); Ok(array)
1189 }
1190
1191 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1192 operands.pop();
1195 Ok(HashMap::new())
1196 }
1197
1198 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1199 let mut components = Vec::new();
1200
1201 while let Some(token) = operands.last() {
1203 match token {
1204 Token::Number(n) => {
1205 components.push(*n);
1206 operands.pop();
1207 }
1208 Token::Integer(i) => {
1209 components.push(*i as f32);
1210 operands.pop();
1211 }
1212 _ => break,
1213 }
1214 }
1215
1216 components.reverse();
1217 Ok(components)
1218 }
1219
1220 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1221 let mut elements = Vec::new();
1222
1223 for token in tokens {
1224 match token {
1225 Token::String(s) | Token::HexString(s) => {
1226 elements.push(TextElement::Text(s));
1227 }
1228 Token::Number(n) => {
1229 elements.push(TextElement::Spacing(n));
1230 }
1231 Token::Integer(i) => {
1232 elements.push(TextElement::Spacing(i as f32));
1233 }
1234 _ => {
1235 return Err(ParseError::SyntaxError {
1236 position: self.position,
1237 message: "Invalid element in text array".to_string(),
1238 });
1239 }
1240 }
1241 }
1242
1243 Ok(elements)
1244 }
1245
1246 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1247 let mut pattern = Vec::new();
1248
1249 for token in tokens {
1250 match token {
1251 Token::Number(n) => pattern.push(n),
1252 Token::Integer(i) => pattern.push(i as f32),
1253 _ => {
1254 return Err(ParseError::SyntaxError {
1255 position: self.position,
1256 message: "Invalid element in dash array".to_string(),
1257 });
1258 }
1259 }
1260 }
1261
1262 Ok(pattern)
1263 }
1264
1265 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1266 while self.position < self.tokens.len() {
1270 if let Token::Operator(op) = &self.tokens[self.position] {
1271 if op == "EI" {
1272 self.position += 1;
1273 break;
1274 }
1275 }
1276 self.position += 1;
1277 }
1278
1279 Ok(ContentOperation::BeginInlineImage)
1280 }
1281}
1282
1283#[cfg(test)]
1284mod tests {
1285 use super::*;
1286
1287 #[test]
1288 fn test_tokenize_numbers() {
1289 let input = b"123 -45 3.14 -0.5 .5";
1290 let mut tokenizer = ContentTokenizer::new(input);
1291
1292 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1293 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1294 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(3.14)));
1295 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1296 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1297 assert_eq!(tokenizer.next_token().unwrap(), None);
1298 }
1299
1300 #[test]
1301 fn test_tokenize_strings() {
1302 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1303 let mut tokenizer = ContentTokenizer::new(input);
1304
1305 assert_eq!(
1306 tokenizer.next_token().unwrap(),
1307 Some(Token::String(b"Hello World".to_vec()))
1308 );
1309 assert_eq!(
1310 tokenizer.next_token().unwrap(),
1311 Some(Token::String(b"Hello\nWorld".to_vec()))
1312 );
1313 assert_eq!(
1314 tokenizer.next_token().unwrap(),
1315 Some(Token::String(b"Nested (paren)".to_vec()))
1316 );
1317 }
1318
1319 #[test]
1320 fn test_tokenize_hex_strings() {
1321 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1322 let mut tokenizer = ContentTokenizer::new(input);
1323
1324 assert_eq!(
1325 tokenizer.next_token().unwrap(),
1326 Some(Token::HexString(b"Hello".to_vec()))
1327 );
1328 assert_eq!(
1329 tokenizer.next_token().unwrap(),
1330 Some(Token::HexString(b"Hello".to_vec()))
1331 );
1332 }
1333
1334 #[test]
1335 fn test_tokenize_names() {
1336 let input = b"/Name /Name#20with#20spaces /A#42C";
1337 let mut tokenizer = ContentTokenizer::new(input);
1338
1339 assert_eq!(
1340 tokenizer.next_token().unwrap(),
1341 Some(Token::Name("Name".to_string()))
1342 );
1343 assert_eq!(
1344 tokenizer.next_token().unwrap(),
1345 Some(Token::Name("Name with spaces".to_string()))
1346 );
1347 assert_eq!(
1348 tokenizer.next_token().unwrap(),
1349 Some(Token::Name("ABC".to_string()))
1350 );
1351 }
1352
1353 #[test]
1354 fn test_tokenize_operators() {
1355 let input = b"BT Tj ET q Q";
1356 let mut tokenizer = ContentTokenizer::new(input);
1357
1358 assert_eq!(
1359 tokenizer.next_token().unwrap(),
1360 Some(Token::Operator("BT".to_string()))
1361 );
1362 assert_eq!(
1363 tokenizer.next_token().unwrap(),
1364 Some(Token::Operator("Tj".to_string()))
1365 );
1366 assert_eq!(
1367 tokenizer.next_token().unwrap(),
1368 Some(Token::Operator("ET".to_string()))
1369 );
1370 assert_eq!(
1371 tokenizer.next_token().unwrap(),
1372 Some(Token::Operator("q".to_string()))
1373 );
1374 assert_eq!(
1375 tokenizer.next_token().unwrap(),
1376 Some(Token::Operator("Q".to_string()))
1377 );
1378 }
1379
1380 #[test]
1381 fn test_parse_text_operators() {
1382 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1383 let operators = ContentParser::parse(content).unwrap();
1384
1385 assert_eq!(operators.len(), 5);
1386 assert_eq!(operators[0], ContentOperation::BeginText);
1387 assert_eq!(
1388 operators[1],
1389 ContentOperation::SetFont("F1".to_string(), 12.0)
1390 );
1391 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1392 assert_eq!(
1393 operators[3],
1394 ContentOperation::ShowText(b"Hello World".to_vec())
1395 );
1396 assert_eq!(operators[4], ContentOperation::EndText);
1397 }
1398
1399 #[test]
1400 fn test_parse_graphics_operators() {
1401 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1402 let operators = ContentParser::parse(content).unwrap();
1403
1404 assert_eq!(operators.len(), 6);
1405 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1406 assert_eq!(
1407 operators[1],
1408 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1409 );
1410 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1411 assert_eq!(
1412 operators[3],
1413 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1414 );
1415 assert_eq!(operators[4], ContentOperation::Stroke);
1416 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1417 }
1418
1419 #[test]
1420 fn test_parse_color_operators() {
1421 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1422 let operators = ContentParser::parse(content).unwrap();
1423
1424 assert_eq!(operators.len(), 3);
1425 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1426 assert_eq!(
1427 operators[1],
1428 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1429 );
1430 assert_eq!(
1431 operators[2],
1432 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1433 );
1434 }
1435
1436 mod comprehensive_tests {
1438 use super::*;
1439
1440 #[test]
1441 fn test_all_text_operators() {
1442 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1444 let operators = ContentParser::parse(content).unwrap();
1445
1446 assert_eq!(operators[0], ContentOperation::BeginText);
1447 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1448 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1449 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1450 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1451 assert_eq!(
1452 operators[5],
1453 ContentOperation::SetFont("F1".to_string(), 12.0)
1454 );
1455 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1456 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1457 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1458 assert_eq!(
1459 operators[9],
1460 ContentOperation::MoveTextSetLeading(50.0, 150.0)
1461 );
1462 assert_eq!(operators[10], ContentOperation::NextLine);
1463 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1464 assert_eq!(operators[12], ContentOperation::EndText);
1465 }
1466
1467 #[test]
1468 fn test_all_graphics_state_operators() {
1469 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1471 let operators = ContentParser::parse(content).unwrap();
1472
1473 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1474 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1475 assert_eq!(
1476 operators[2],
1477 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1478 );
1479 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1480 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1481 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1482 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1483 assert_eq!(
1484 operators[7],
1485 ContentOperation::SetGraphicsStateParams("GS1".to_string())
1486 );
1487 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1488 assert_eq!(
1489 operators[9],
1490 ContentOperation::SetIntent("Perceptual".to_string())
1491 );
1492 }
1493
1494 #[test]
1495 fn test_all_path_construction_operators() {
1496 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1497 let operators = ContentParser::parse(content).unwrap();
1498
1499 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1500 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1501 assert_eq!(
1502 operators[2],
1503 ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1504 );
1505 assert_eq!(
1506 operators[3],
1507 ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1508 );
1509 assert_eq!(
1510 operators[4],
1511 ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1512 );
1513 assert_eq!(operators[5], ContentOperation::ClosePath);
1514 assert_eq!(
1515 operators[6],
1516 ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1517 );
1518 }
1519
1520 #[test]
1521 fn test_all_path_painting_operators() {
1522 let content = b"S s f F f* B B* b b* n W W*";
1523 let operators = ContentParser::parse(content).unwrap();
1524
1525 assert_eq!(operators[0], ContentOperation::Stroke);
1526 assert_eq!(operators[1], ContentOperation::CloseStroke);
1527 assert_eq!(operators[2], ContentOperation::Fill);
1528 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1530 assert_eq!(operators[5], ContentOperation::FillStroke);
1531 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1532 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1533 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1534 assert_eq!(operators[9], ContentOperation::EndPath);
1535 assert_eq!(operators[10], ContentOperation::Clip);
1536 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1537 }
1538
1539 #[test]
1540 fn test_all_color_operators() {
1541 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1543 let operators = ContentParser::parse(content).unwrap();
1544
1545 assert_eq!(
1546 operators[0],
1547 ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1548 );
1549 assert_eq!(
1550 operators[1],
1551 ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1552 );
1553 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1554 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1555 assert_eq!(
1556 operators[4],
1557 ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1558 );
1559 assert_eq!(
1560 operators[5],
1561 ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1562 );
1563 assert_eq!(
1564 operators[6],
1565 ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1566 );
1567 assert_eq!(
1568 operators[7],
1569 ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1570 );
1571 assert_eq!(
1572 operators[8],
1573 ContentOperation::ShadingFill("Shade1".to_string())
1574 );
1575 }
1576
1577 #[test]
1578 fn test_xobject_and_marked_content_operators() {
1579 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1581 let operators = ContentParser::parse(content).unwrap();
1582
1583 assert_eq!(
1584 operators[0],
1585 ContentOperation::PaintXObject("Image1".to_string())
1586 );
1587 assert_eq!(
1588 operators[1],
1589 ContentOperation::BeginMarkedContent("MC1".to_string())
1590 );
1591 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1592 assert_eq!(
1593 operators[3],
1594 ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1595 );
1596 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1597 assert_eq!(operators[5], ContentOperation::EndCompatibility);
1598 }
1599
1600 #[test]
1601 fn test_complex_content_stream() {
1602 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1603 let operators = ContentParser::parse(content).unwrap();
1604
1605 assert_eq!(operators.len(), 8);
1606 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1607 assert_eq!(
1608 operators[1],
1609 ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1610 );
1611 assert_eq!(operators[2], ContentOperation::BeginText);
1612 assert_eq!(
1613 operators[3],
1614 ContentOperation::SetFont("F1".to_string(), 12.0)
1615 );
1616 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1617 assert_eq!(
1618 operators[5],
1619 ContentOperation::ShowText(b"Complex".to_vec())
1620 );
1621 assert_eq!(operators[6], ContentOperation::EndText);
1622 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1623 }
1624
1625 #[test]
1626 fn test_tokenizer_whitespace_handling() {
1627 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
1628 let mut tokenizer = ContentTokenizer::new(input);
1629
1630 assert_eq!(
1631 tokenizer.next_token().unwrap(),
1632 Some(Token::Operator("BT".to_string()))
1633 );
1634 assert_eq!(
1635 tokenizer.next_token().unwrap(),
1636 Some(Token::Name("F1".to_string()))
1637 );
1638 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1639 assert_eq!(
1640 tokenizer.next_token().unwrap(),
1641 Some(Token::Operator("Tf".to_string()))
1642 );
1643 assert_eq!(
1644 tokenizer.next_token().unwrap(),
1645 Some(Token::Operator("ET".to_string()))
1646 );
1647 assert_eq!(tokenizer.next_token().unwrap(), None);
1648 }
1649
1650 #[test]
1651 fn test_tokenizer_edge_cases() {
1652 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1654 let mut tokenizer = ContentTokenizer::new(input);
1655
1656 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1657 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1658 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1659 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1660 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1661 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1662 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1663 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1664 }
1665
1666 #[test]
1667 fn test_string_parsing_edge_cases() {
1668 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1669 let mut tokenizer = ContentTokenizer::new(input);
1670
1671 assert_eq!(
1672 tokenizer.next_token().unwrap(),
1673 Some(Token::String(b"Simple".to_vec()))
1674 );
1675 assert_eq!(
1676 tokenizer.next_token().unwrap(),
1677 Some(Token::String(b"With\\backslash".to_vec()))
1678 );
1679 assert_eq!(
1680 tokenizer.next_token().unwrap(),
1681 Some(Token::String(b"With)paren".to_vec()))
1682 );
1683 assert_eq!(
1684 tokenizer.next_token().unwrap(),
1685 Some(Token::String(b"With\newline".to_vec()))
1686 );
1687 assert_eq!(
1688 tokenizer.next_token().unwrap(),
1689 Some(Token::String(b"With\ttab".to_vec()))
1690 );
1691 assert_eq!(
1692 tokenizer.next_token().unwrap(),
1693 Some(Token::String(b"With\rcarriage".to_vec()))
1694 );
1695 assert_eq!(
1696 tokenizer.next_token().unwrap(),
1697 Some(Token::String(b"With\x08backspace".to_vec()))
1698 );
1699 assert_eq!(
1700 tokenizer.next_token().unwrap(),
1701 Some(Token::String(b"With\x0Cformfeed".to_vec()))
1702 );
1703 assert_eq!(
1704 tokenizer.next_token().unwrap(),
1705 Some(Token::String(b"With(leftparen".to_vec()))
1706 );
1707 assert_eq!(
1708 tokenizer.next_token().unwrap(),
1709 Some(Token::String(b"With)rightparen".to_vec()))
1710 );
1711 }
1712
1713 #[test]
1714 fn test_hex_string_parsing() {
1715 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1716 let mut tokenizer = ContentTokenizer::new(input);
1717
1718 assert_eq!(
1719 tokenizer.next_token().unwrap(),
1720 Some(Token::HexString(b"Hello".to_vec()))
1721 );
1722 assert_eq!(
1723 tokenizer.next_token().unwrap(),
1724 Some(Token::HexString(b"Hello".to_vec()))
1725 );
1726 assert_eq!(
1727 tokenizer.next_token().unwrap(),
1728 Some(Token::HexString(b"HelloW".to_vec()))
1729 );
1730 assert_eq!(
1731 tokenizer.next_token().unwrap(),
1732 Some(Token::HexString(b"Hello\x50".to_vec()))
1733 );
1734 }
1735
1736 #[test]
1737 fn test_name_parsing_edge_cases() {
1738 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1739 let mut tokenizer = ContentTokenizer::new(input);
1740
1741 assert_eq!(
1742 tokenizer.next_token().unwrap(),
1743 Some(Token::Name("Name".to_string()))
1744 );
1745 assert_eq!(
1746 tokenizer.next_token().unwrap(),
1747 Some(Token::Name("Name with spaces".to_string()))
1748 );
1749 assert_eq!(
1750 tokenizer.next_token().unwrap(),
1751 Some(Token::Name("Name#with#hash".to_string()))
1752 );
1753 assert_eq!(
1754 tokenizer.next_token().unwrap(),
1755 Some(Token::Name("Name/with/slash".to_string()))
1756 );
1757 assert_eq!(
1758 tokenizer.next_token().unwrap(),
1759 Some(Token::Name("EmptyName".to_string()))
1760 );
1761 }
1762
1763 #[test]
1764 fn test_operator_parsing_edge_cases() {
1765 let content = b"q q q Q Q Q BT BT ET ET";
1766 let operators = ContentParser::parse(content).unwrap();
1767
1768 assert_eq!(operators.len(), 10);
1769 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1770 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1771 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1772 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1773 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1774 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1775 assert_eq!(operators[6], ContentOperation::BeginText);
1776 assert_eq!(operators[7], ContentOperation::BeginText);
1777 assert_eq!(operators[8], ContentOperation::EndText);
1778 assert_eq!(operators[9], ContentOperation::EndText);
1779 }
1780
1781 #[test]
1782 fn test_error_handling_insufficient_operands() {
1783 let content = b"100 Td"; let result = ContentParser::parse(content);
1785 assert!(result.is_err());
1786 }
1787
1788 #[test]
1789 fn test_error_handling_invalid_operator() {
1790 let content = b"100 200 INVALID";
1791 let result = ContentParser::parse(content);
1792 assert!(result.is_err());
1793 }
1794
1795 #[test]
1796 fn test_error_handling_malformed_string() {
1797 let input = b"(Unclosed string";
1799 let mut tokenizer = ContentTokenizer::new(input);
1800 let result = tokenizer.next_token();
1801 assert!(result.is_ok() || result.is_err());
1804 }
1805
1806 #[test]
1807 fn test_error_handling_malformed_hex_string() {
1808 let input = b"<48656C6C6G>";
1809 let mut tokenizer = ContentTokenizer::new(input);
1810 let result = tokenizer.next_token();
1811 assert!(result.is_err());
1812 }
1813
1814 #[test]
1815 fn test_error_handling_malformed_name() {
1816 let input = b"/Name#GG";
1817 let mut tokenizer = ContentTokenizer::new(input);
1818 let result = tokenizer.next_token();
1819 assert!(result.is_err());
1820 }
1821
1822 #[test]
1823 fn test_empty_content_stream() {
1824 let content = b"";
1825 let operators = ContentParser::parse(content).unwrap();
1826 assert_eq!(operators.len(), 0);
1827 }
1828
1829 #[test]
1830 fn test_whitespace_only_content_stream() {
1831 let content = b" \t\n\r ";
1832 let operators = ContentParser::parse(content).unwrap();
1833 assert_eq!(operators.len(), 0);
1834 }
1835
1836 #[test]
1837 fn test_mixed_integer_and_real_operands() {
1838 let content = b"100 200 m 150 200 l";
1840 let operators = ContentParser::parse(content).unwrap();
1841
1842 assert_eq!(operators.len(), 2);
1843 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1844 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1845 }
1846
1847 #[test]
1848 fn test_negative_operands() {
1849 let content = b"-100 -200 Td -50.5 -75.2 TD";
1850 let operators = ContentParser::parse(content).unwrap();
1851
1852 assert_eq!(operators.len(), 2);
1853 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
1854 assert_eq!(
1855 operators[1],
1856 ContentOperation::MoveTextSetLeading(-50.5, -75.2)
1857 );
1858 }
1859
1860 #[test]
1861 fn test_large_numbers() {
1862 let content = b"999999.999999 -999999.999999 m";
1863 let operators = ContentParser::parse(content).unwrap();
1864
1865 assert_eq!(operators.len(), 1);
1866 assert_eq!(
1867 operators[0],
1868 ContentOperation::MoveTo(999999.999999, -999999.999999)
1869 );
1870 }
1871
1872 #[test]
1873 fn test_scientific_notation() {
1874 let content = b"123.45 -456.78 m";
1876 let operators = ContentParser::parse(content).unwrap();
1877
1878 assert_eq!(operators.len(), 1);
1879 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
1880 }
1881
1882 #[test]
1883 fn test_show_text_array_complex() {
1884 let content = b"(Hello) TJ";
1886 let result = ContentParser::parse(content);
1887 assert!(result.is_err());
1889 }
1890
1891 #[test]
1892 fn test_dash_pattern_empty() {
1893 let content = b"0 d";
1895 let result = ContentParser::parse(content);
1896 assert!(result.is_err());
1898 }
1899
1900 #[test]
1901 fn test_dash_pattern_complex() {
1902 let content = b"2.5 d";
1904 let result = ContentParser::parse(content);
1905 assert!(result.is_err());
1907 }
1908
1909 #[test]
1910 fn test_inline_image_handling() {
1911 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
1912 let operators = ContentParser::parse(content).unwrap();
1913
1914 assert_eq!(operators.len(), 1);
1915 assert_eq!(operators[0], ContentOperation::BeginInlineImage);
1916 }
1917
1918 #[test]
1919 fn test_content_parser_performance() {
1920 let mut content = Vec::new();
1921 for i in 0..1000 {
1922 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
1923 }
1924
1925 let start = std::time::Instant::now();
1926 let operators = ContentParser::parse(&content).unwrap();
1927 let duration = start.elapsed();
1928
1929 assert_eq!(operators.len(), 1000);
1930 assert!(duration.as_millis() < 100); }
1932
1933 #[test]
1934 fn test_tokenizer_performance() {
1935 let mut input = Vec::new();
1936 for i in 0..1000 {
1937 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
1938 }
1939
1940 let start = std::time::Instant::now();
1941 let mut tokenizer = ContentTokenizer::new(&input);
1942 let mut count = 0;
1943 while tokenizer.next_token().unwrap().is_some() {
1944 count += 1;
1945 }
1946 let duration = start.elapsed();
1947
1948 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
1951
1952 #[test]
1953 fn test_memory_usage_large_content() {
1954 let mut content = Vec::new();
1955 for i in 0..10000 {
1956 content.extend_from_slice(
1957 format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
1958 .as_bytes(),
1959 );
1960 }
1961
1962 let operators = ContentParser::parse(&content).unwrap();
1963 assert_eq!(operators.len(), 10000);
1964
1965 for op in operators {
1967 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
1968 }
1969 }
1970
1971 #[test]
1972 fn test_concurrent_parsing() {
1973 use std::sync::Arc;
1974 use std::thread;
1975
1976 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
1977 let handles: Vec<_> = (0..10)
1978 .map(|_| {
1979 let content_clone = content.clone();
1980 thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
1981 })
1982 .collect();
1983
1984 for handle in handles {
1985 let operators = handle.join().unwrap();
1986 assert_eq!(operators.len(), 5);
1987 assert_eq!(operators[0], ContentOperation::BeginText);
1988 assert_eq!(operators[4], ContentOperation::EndText);
1989 }
1990 }
1991 }
1992}