1use super::{ParseError, ParseResult};
57use std::collections::HashMap;
58
59#[derive(Debug, Clone, PartialEq)]
93pub enum ContentOperation {
94 BeginText,
98
99 EndText,
102
103 SetCharSpacing(f32),
107
108 SetWordSpacing(f32),
111
112 SetHorizontalScaling(f32),
115
116 SetLeading(f32),
119
120 SetFont(String, f32),
123
124 SetTextRenderMode(i32),
127
128 SetTextRise(f32),
131
132 MoveText(f32, f32),
136
137 MoveTextSetLeading(f32, f32),
140
141 SetTextMatrix(f32, f32, f32, f32, f32, f32),
144
145 NextLine,
148
149 ShowText(Vec<u8>),
153
154 ShowTextArray(Vec<TextElement>),
157
158 NextLineShowText(Vec<u8>),
161
162 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
165
166 SaveGraphicsState,
170
171 RestoreGraphicsState,
174
175 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
178
179 SetLineWidth(f32),
181
182 SetLineCap(i32),
185
186 SetLineJoin(i32),
189
190 SetMiterLimit(f32),
193
194 SetDashPattern(Vec<f32>, f32),
197
198 SetIntent(String),
201
202 SetFlatness(f32),
205
206 SetGraphicsStateParams(String),
209
210 MoveTo(f32, f32),
213
214 LineTo(f32, f32),
216
217 CurveTo(f32, f32, f32, f32, f32, f32),
220
221 CurveToV(f32, f32, f32, f32),
223
224 CurveToY(f32, f32, f32, f32),
226
227 ClosePath,
230
231 Rectangle(f32, f32, f32, f32),
234
235 Stroke,
238
239 CloseStroke,
242
243 Fill,
245
246 FillEvenOdd,
248
249 FillStroke,
252
253 FillStrokeEvenOdd,
255
256 CloseFillStroke,
259
260 CloseFillStrokeEvenOdd,
262
263 EndPath,
266
267 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
275
276 SetNonStrokingColorSpace(String),
279
280 SetStrokingColor(Vec<f32>),
283
284 SetNonStrokingColor(Vec<f32>),
287
288 SetStrokingGray(f32),
291
292 SetNonStrokingGray(f32),
294
295 SetStrokingRGB(f32, f32, f32),
298
299 SetNonStrokingRGB(f32, f32, f32),
301
302 SetStrokingCMYK(f32, f32, f32, f32),
304
305 SetNonStrokingCMYK(f32, f32, f32, f32),
307
308 ShadingFill(String), BeginInlineImage, InlineImageData(Vec<u8>), PaintXObject(String),
319
320 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
331
332#[derive(Debug, Clone, PartialEq)]
351pub enum TextElement {
352 Text(Vec<u8>),
354 Spacing(f32),
357}
358
359#[derive(Debug, Clone, PartialEq)]
361pub(super) enum Token {
362 Number(f32),
363 Integer(i32),
364 String(Vec<u8>),
365 HexString(Vec<u8>),
366 Name(String),
367 Operator(String),
368 ArrayStart,
369 ArrayEnd,
370 DictStart,
371 DictEnd,
372}
373
374pub struct ContentTokenizer<'a> {
376 input: &'a [u8],
377 position: usize,
378}
379
380impl<'a> ContentTokenizer<'a> {
381 pub fn new(input: &'a [u8]) -> Self {
383 Self { input, position: 0 }
384 }
385
386 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
388 self.skip_whitespace();
389
390 if self.position >= self.input.len() {
391 return Ok(None);
392 }
393
394 let ch = self.input[self.position];
395
396 match ch {
397 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
399
400 b'(' => self.read_literal_string(),
402 b'<' => {
403 if self.peek_next() == Some(b'<') {
404 self.position += 2;
405 Ok(Some(Token::DictStart))
406 } else {
407 self.read_hex_string()
408 }
409 }
410 b'>' => {
411 if self.peek_next() == Some(b'>') {
412 self.position += 2;
413 Ok(Some(Token::DictEnd))
414 } else {
415 Err(ParseError::SyntaxError {
416 position: self.position,
417 message: "Unexpected '>'".to_string(),
418 })
419 }
420 }
421
422 b'[' => {
424 self.position += 1;
425 Ok(Some(Token::ArrayStart))
426 }
427 b']' => {
428 self.position += 1;
429 Ok(Some(Token::ArrayEnd))
430 }
431
432 b'/' => self.read_name(),
434
435 _ => self.read_operator(),
437 }
438 }
439
440 fn skip_whitespace(&mut self) {
441 while self.position < self.input.len() {
442 match self.input[self.position] {
443 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
444 b'%' => self.skip_comment(),
445 _ => break,
446 }
447 }
448 }
449
450 fn skip_comment(&mut self) {
451 while self.position < self.input.len() && self.input[self.position] != b'\n' {
452 self.position += 1;
453 }
454 }
455
456 fn peek_next(&self) -> Option<u8> {
457 if self.position + 1 < self.input.len() {
458 Some(self.input[self.position + 1])
459 } else {
460 None
461 }
462 }
463
464 fn read_number(&mut self) -> ParseResult<Option<Token>> {
465 let start = self.position;
466 let mut has_dot = false;
467
468 if self.position < self.input.len()
470 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
471 {
472 self.position += 1;
473 }
474
475 while self.position < self.input.len() {
477 match self.input[self.position] {
478 b'0'..=b'9' => self.position += 1,
479 b'.' if !has_dot => {
480 has_dot = true;
481 self.position += 1;
482 }
483 _ => break,
484 }
485 }
486
487 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
488 ParseError::SyntaxError {
489 position: start,
490 message: "Invalid number format".to_string(),
491 }
492 })?;
493
494 if has_dot {
495 let value = num_str
496 .parse::<f32>()
497 .map_err(|_| ParseError::SyntaxError {
498 position: start,
499 message: "Invalid float number".to_string(),
500 })?;
501 Ok(Some(Token::Number(value)))
502 } else {
503 let value = num_str
504 .parse::<i32>()
505 .map_err(|_| ParseError::SyntaxError {
506 position: start,
507 message: "Invalid integer number".to_string(),
508 })?;
509 Ok(Some(Token::Integer(value)))
510 }
511 }
512
513 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
514 self.position += 1; let mut result = Vec::new();
516 let mut paren_depth = 1;
517 let mut escape = false;
518
519 while self.position < self.input.len() && paren_depth > 0 {
520 let ch = self.input[self.position];
521 self.position += 1;
522
523 if escape {
524 match ch {
525 b'n' => result.push(b'\n'),
526 b'r' => result.push(b'\r'),
527 b't' => result.push(b'\t'),
528 b'b' => result.push(b'\x08'),
529 b'f' => result.push(b'\x0C'),
530 b'(' => result.push(b'('),
531 b')' => result.push(b')'),
532 b'\\' => result.push(b'\\'),
533 b'0'..=b'7' => {
534 self.position -= 1;
536 let octal_value = self.read_octal_escape()?;
537 result.push(octal_value);
538 }
539 _ => result.push(ch), }
541 escape = false;
542 } else {
543 match ch {
544 b'\\' => escape = true,
545 b'(' => {
546 paren_depth += 1;
547 result.push(ch);
548 }
549 b')' => {
550 paren_depth -= 1;
551 if paren_depth > 0 {
552 result.push(ch);
553 }
554 }
555 _ => result.push(ch),
556 }
557 }
558 }
559
560 Ok(Some(Token::String(result)))
561 }
562
563 fn read_octal_escape(&mut self) -> ParseResult<u8> {
564 let mut value = 0u8;
565 let mut count = 0;
566
567 while count < 3 && self.position < self.input.len() {
568 match self.input[self.position] {
569 b'0'..=b'7' => {
570 value = value * 8 + (self.input[self.position] - b'0');
571 self.position += 1;
572 count += 1;
573 }
574 _ => break,
575 }
576 }
577
578 Ok(value)
579 }
580
581 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
582 self.position += 1; let mut result = Vec::new();
584 let mut nibble = None;
585
586 while self.position < self.input.len() {
587 let ch = self.input[self.position];
588
589 match ch {
590 b'>' => {
591 self.position += 1;
592 if let Some(n) = nibble {
594 result.push(n << 4);
595 }
596 return Ok(Some(Token::HexString(result)));
597 }
598 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
599 let digit = if ch <= b'9' {
600 ch - b'0'
601 } else if ch <= b'F' {
602 ch - b'A' + 10
603 } else {
604 ch - b'a' + 10
605 };
606
607 if let Some(n) = nibble {
608 result.push((n << 4) | digit);
609 nibble = None;
610 } else {
611 nibble = Some(digit);
612 }
613 self.position += 1;
614 }
615 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
616 self.position += 1;
618 }
619 _ => {
620 return Err(ParseError::SyntaxError {
621 position: self.position,
622 message: format!("Invalid character in hex string: {:?}", ch as char),
623 });
624 }
625 }
626 }
627
628 Err(ParseError::SyntaxError {
629 position: self.position,
630 message: "Unterminated hex string".to_string(),
631 })
632 }
633
634 fn read_name(&mut self) -> ParseResult<Option<Token>> {
635 self.position += 1; let start = self.position;
637
638 while self.position < self.input.len() {
639 let ch = self.input[self.position];
640 match ch {
641 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
642 | b']' | b'{' | b'}' | b'/' | b'%' => break,
643 b'#' => {
644 self.position += 1;
646 if self.position + 1 < self.input.len() {
647 self.position += 2;
648 }
649 }
650 _ => self.position += 1,
651 }
652 }
653
654 let name_bytes = &self.input[start..self.position];
655 let name = self.decode_name(name_bytes)?;
656 Ok(Some(Token::Name(name)))
657 }
658
659 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
660 let mut result = Vec::new();
661 let mut i = 0;
662
663 while i < bytes.len() {
664 if bytes[i] == b'#' && i + 2 < bytes.len() {
665 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
667 ParseError::SyntaxError {
668 position: self.position,
669 message: "Invalid hex escape in name".to_string(),
670 }
671 })?;
672 let value =
673 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
674 position: self.position,
675 message: "Invalid hex escape in name".to_string(),
676 })?;
677 result.push(value);
678 i += 3;
679 } else {
680 result.push(bytes[i]);
681 i += 1;
682 }
683 }
684
685 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
686 position: self.position,
687 message: "Invalid UTF-8 in name".to_string(),
688 })
689 }
690
691 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
692 let start = self.position;
693
694 while self.position < self.input.len() {
695 let ch = self.input[self.position];
696 match ch {
697 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
698 | b']' | b'{' | b'}' | b'/' | b'%' => break,
699 _ => self.position += 1,
700 }
701 }
702
703 let op_bytes = &self.input[start..self.position];
704 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
705 position: start,
706 message: "Invalid operator".to_string(),
707 })?;
708
709 Ok(Some(Token::Operator(op.to_string())))
710 }
711}
712
713pub struct ContentParser {
732 tokens: Vec<Token>,
733 position: usize,
734}
735
736impl ContentParser {
737 pub fn new(_content: &[u8]) -> Self {
739 Self {
740 tokens: Vec::new(),
741 position: 0,
742 }
743 }
744
745 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
779 Self::parse_content(content)
780 }
781
782 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
787 let mut tokenizer = ContentTokenizer::new(content);
788 let mut tokens = Vec::new();
789
790 while let Some(token) = tokenizer.next_token()? {
792 tokens.push(token);
793 }
794
795 let mut parser = Self {
796 tokens,
797 position: 0,
798 };
799
800 parser.parse_operators()
801 }
802
803 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
804 let mut operators = Vec::new();
805 let mut operand_stack: Vec<Token> = Vec::new();
806
807 while self.position < self.tokens.len() {
808 let token = self.tokens[self.position].clone();
809 self.position += 1;
810
811 match &token {
812 Token::Operator(op) => {
813 let operator = self.parse_operator(op, &mut operand_stack)?;
814 operators.push(operator);
815 }
816 _ => {
817 operand_stack.push(token);
819 }
820 }
821 }
822
823 Ok(operators)
824 }
825
826 fn parse_operator(
827 &mut self,
828 op: &str,
829 operands: &mut Vec<Token>,
830 ) -> ParseResult<ContentOperation> {
831 let operator = match op {
832 "BT" => ContentOperation::BeginText,
834 "ET" => ContentOperation::EndText,
835
836 "Tc" => {
838 let spacing = self.pop_number(operands)?;
839 ContentOperation::SetCharSpacing(spacing)
840 }
841 "Tw" => {
842 let spacing = self.pop_number(operands)?;
843 ContentOperation::SetWordSpacing(spacing)
844 }
845 "Tz" => {
846 let scale = self.pop_number(operands)?;
847 ContentOperation::SetHorizontalScaling(scale)
848 }
849 "TL" => {
850 let leading = self.pop_number(operands)?;
851 ContentOperation::SetLeading(leading)
852 }
853 "Tf" => {
854 let size = self.pop_number(operands)?;
855 let font = self.pop_name(operands)?;
856 ContentOperation::SetFont(font, size)
857 }
858 "Tr" => {
859 let mode = self.pop_integer(operands)?;
860 ContentOperation::SetTextRenderMode(mode)
861 }
862 "Ts" => {
863 let rise = self.pop_number(operands)?;
864 ContentOperation::SetTextRise(rise)
865 }
866
867 "Td" => {
869 let ty = self.pop_number(operands)?;
870 let tx = self.pop_number(operands)?;
871 ContentOperation::MoveText(tx, ty)
872 }
873 "TD" => {
874 let ty = self.pop_number(operands)?;
875 let tx = self.pop_number(operands)?;
876 ContentOperation::MoveTextSetLeading(tx, ty)
877 }
878 "Tm" => {
879 let f = self.pop_number(operands)?;
880 let e = self.pop_number(operands)?;
881 let d = self.pop_number(operands)?;
882 let c = self.pop_number(operands)?;
883 let b = self.pop_number(operands)?;
884 let a = self.pop_number(operands)?;
885 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
886 }
887 "T*" => ContentOperation::NextLine,
888
889 "Tj" => {
891 let text = self.pop_string(operands)?;
892 ContentOperation::ShowText(text)
893 }
894 "TJ" => {
895 let array = self.pop_array(operands)?;
896 let elements = self.parse_text_array(array)?;
897 ContentOperation::ShowTextArray(elements)
898 }
899 "'" => {
900 let text = self.pop_string(operands)?;
901 ContentOperation::NextLineShowText(text)
902 }
903 "\"" => {
904 let text = self.pop_string(operands)?;
905 let aw = self.pop_number(operands)?;
906 let ac = self.pop_number(operands)?;
907 ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
908 }
909
910 "q" => ContentOperation::SaveGraphicsState,
912 "Q" => ContentOperation::RestoreGraphicsState,
913 "cm" => {
914 let f = self.pop_number(operands)?;
915 let e = self.pop_number(operands)?;
916 let d = self.pop_number(operands)?;
917 let c = self.pop_number(operands)?;
918 let b = self.pop_number(operands)?;
919 let a = self.pop_number(operands)?;
920 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
921 }
922 "w" => {
923 let width = self.pop_number(operands)?;
924 ContentOperation::SetLineWidth(width)
925 }
926 "J" => {
927 let cap = self.pop_integer(operands)?;
928 ContentOperation::SetLineCap(cap)
929 }
930 "j" => {
931 let join = self.pop_integer(operands)?;
932 ContentOperation::SetLineJoin(join)
933 }
934 "M" => {
935 let limit = self.pop_number(operands)?;
936 ContentOperation::SetMiterLimit(limit)
937 }
938 "d" => {
939 let phase = self.pop_number(operands)?;
940 let array = self.pop_array(operands)?;
941 let pattern = self.parse_dash_array(array)?;
942 ContentOperation::SetDashPattern(pattern, phase)
943 }
944 "ri" => {
945 let intent = self.pop_name(operands)?;
946 ContentOperation::SetIntent(intent)
947 }
948 "i" => {
949 let flatness = self.pop_number(operands)?;
950 ContentOperation::SetFlatness(flatness)
951 }
952 "gs" => {
953 let name = self.pop_name(operands)?;
954 ContentOperation::SetGraphicsStateParams(name)
955 }
956
957 "m" => {
959 let y = self.pop_number(operands)?;
960 let x = self.pop_number(operands)?;
961 ContentOperation::MoveTo(x, y)
962 }
963 "l" => {
964 let y = self.pop_number(operands)?;
965 let x = self.pop_number(operands)?;
966 ContentOperation::LineTo(x, y)
967 }
968 "c" => {
969 let y3 = self.pop_number(operands)?;
970 let x3 = self.pop_number(operands)?;
971 let y2 = self.pop_number(operands)?;
972 let x2 = self.pop_number(operands)?;
973 let y1 = self.pop_number(operands)?;
974 let x1 = self.pop_number(operands)?;
975 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
976 }
977 "v" => {
978 let y3 = self.pop_number(operands)?;
979 let x3 = self.pop_number(operands)?;
980 let y2 = self.pop_number(operands)?;
981 let x2 = self.pop_number(operands)?;
982 ContentOperation::CurveToV(x2, y2, x3, y3)
983 }
984 "y" => {
985 let y3 = self.pop_number(operands)?;
986 let x3 = self.pop_number(operands)?;
987 let y1 = self.pop_number(operands)?;
988 let x1 = self.pop_number(operands)?;
989 ContentOperation::CurveToY(x1, y1, x3, y3)
990 }
991 "h" => ContentOperation::ClosePath,
992 "re" => {
993 let height = self.pop_number(operands)?;
994 let width = self.pop_number(operands)?;
995 let y = self.pop_number(operands)?;
996 let x = self.pop_number(operands)?;
997 ContentOperation::Rectangle(x, y, width, height)
998 }
999
1000 "S" => ContentOperation::Stroke,
1002 "s" => ContentOperation::CloseStroke,
1003 "f" | "F" => ContentOperation::Fill,
1004 "f*" => ContentOperation::FillEvenOdd,
1005 "B" => ContentOperation::FillStroke,
1006 "B*" => ContentOperation::FillStrokeEvenOdd,
1007 "b" => ContentOperation::CloseFillStroke,
1008 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1009 "n" => ContentOperation::EndPath,
1010
1011 "W" => ContentOperation::Clip,
1013 "W*" => ContentOperation::ClipEvenOdd,
1014
1015 "CS" => {
1017 let name = self.pop_name(operands)?;
1018 ContentOperation::SetStrokingColorSpace(name)
1019 }
1020 "cs" => {
1021 let name = self.pop_name(operands)?;
1022 ContentOperation::SetNonStrokingColorSpace(name)
1023 }
1024 "SC" | "SCN" => {
1025 let components = self.pop_color_components(operands)?;
1026 ContentOperation::SetStrokingColor(components)
1027 }
1028 "sc" | "scn" => {
1029 let components = self.pop_color_components(operands)?;
1030 ContentOperation::SetNonStrokingColor(components)
1031 }
1032 "G" => {
1033 let gray = self.pop_number(operands)?;
1034 ContentOperation::SetStrokingGray(gray)
1035 }
1036 "g" => {
1037 let gray = self.pop_number(operands)?;
1038 ContentOperation::SetNonStrokingGray(gray)
1039 }
1040 "RG" => {
1041 let b = self.pop_number(operands)?;
1042 let g = self.pop_number(operands)?;
1043 let r = self.pop_number(operands)?;
1044 ContentOperation::SetStrokingRGB(r, g, b)
1045 }
1046 "rg" => {
1047 let b = self.pop_number(operands)?;
1048 let g = self.pop_number(operands)?;
1049 let r = self.pop_number(operands)?;
1050 ContentOperation::SetNonStrokingRGB(r, g, b)
1051 }
1052 "K" => {
1053 let k = self.pop_number(operands)?;
1054 let y = self.pop_number(operands)?;
1055 let m = self.pop_number(operands)?;
1056 let c = self.pop_number(operands)?;
1057 ContentOperation::SetStrokingCMYK(c, m, y, k)
1058 }
1059 "k" => {
1060 let k = self.pop_number(operands)?;
1061 let y = self.pop_number(operands)?;
1062 let m = self.pop_number(operands)?;
1063 let c = self.pop_number(operands)?;
1064 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1065 }
1066
1067 "sh" => {
1069 let name = self.pop_name(operands)?;
1070 ContentOperation::ShadingFill(name)
1071 }
1072
1073 "Do" => {
1075 let name = self.pop_name(operands)?;
1076 ContentOperation::PaintXObject(name)
1077 }
1078
1079 "BMC" => {
1081 let tag = self.pop_name(operands)?;
1082 ContentOperation::BeginMarkedContent(tag)
1083 }
1084 "BDC" => {
1085 let props = self.pop_dict_or_name(operands)?;
1086 let tag = self.pop_name(operands)?;
1087 ContentOperation::BeginMarkedContentWithProps(tag, props)
1088 }
1089 "EMC" => ContentOperation::EndMarkedContent,
1090 "MP" => {
1091 let tag = self.pop_name(operands)?;
1092 ContentOperation::DefineMarkedContentPoint(tag)
1093 }
1094 "DP" => {
1095 let props = self.pop_dict_or_name(operands)?;
1096 let tag = self.pop_name(operands)?;
1097 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1098 }
1099
1100 "BX" => ContentOperation::BeginCompatibility,
1102 "EX" => ContentOperation::EndCompatibility,
1103
1104 "BI" => {
1106 operands.clear(); self.parse_inline_image()?
1108 }
1109
1110 _ => {
1111 return Err(ParseError::SyntaxError {
1112 position: self.position,
1113 message: format!("Unknown operator: {op}"),
1114 });
1115 }
1116 };
1117
1118 operands.clear(); Ok(operator)
1120 }
1121
1122 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1124 match operands.pop() {
1125 Some(Token::Number(n)) => Ok(n),
1126 Some(Token::Integer(i)) => Ok(i as f32),
1127 _ => Err(ParseError::SyntaxError {
1128 position: self.position,
1129 message: "Expected number operand".to_string(),
1130 }),
1131 }
1132 }
1133
1134 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1135 match operands.pop() {
1136 Some(Token::Integer(i)) => Ok(i),
1137 _ => Err(ParseError::SyntaxError {
1138 position: self.position,
1139 message: "Expected integer operand".to_string(),
1140 }),
1141 }
1142 }
1143
1144 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1145 match operands.pop() {
1146 Some(Token::Name(n)) => Ok(n),
1147 _ => Err(ParseError::SyntaxError {
1148 position: self.position,
1149 message: "Expected name operand".to_string(),
1150 }),
1151 }
1152 }
1153
1154 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1155 match operands.pop() {
1156 Some(Token::String(s)) => Ok(s),
1157 Some(Token::HexString(s)) => Ok(s),
1158 _ => Err(ParseError::SyntaxError {
1159 position: self.position,
1160 message: "Expected string operand".to_string(),
1161 }),
1162 }
1163 }
1164
1165 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1166 let mut array = Vec::new();
1167 let mut found_start = false;
1168
1169 while let Some(token) = operands.pop() {
1171 match token {
1172 Token::ArrayStart => {
1173 found_start = true;
1174 break;
1175 }
1176 _ => array.push(token),
1177 }
1178 }
1179
1180 if !found_start {
1181 return Err(ParseError::SyntaxError {
1182 position: self.position,
1183 message: "Expected array".to_string(),
1184 });
1185 }
1186
1187 array.reverse(); Ok(array)
1189 }
1190
1191 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1192 operands.pop();
1195 Ok(HashMap::new())
1196 }
1197
1198 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1199 let mut components = Vec::new();
1200
1201 while let Some(token) = operands.last() {
1203 match token {
1204 Token::Number(n) => {
1205 components.push(*n);
1206 operands.pop();
1207 }
1208 Token::Integer(i) => {
1209 components.push(*i as f32);
1210 operands.pop();
1211 }
1212 _ => break,
1213 }
1214 }
1215
1216 components.reverse();
1217 Ok(components)
1218 }
1219
1220 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1221 let mut elements = Vec::new();
1222
1223 for token in tokens {
1224 match token {
1225 Token::String(s) | Token::HexString(s) => {
1226 elements.push(TextElement::Text(s));
1227 }
1228 Token::Number(n) => {
1229 elements.push(TextElement::Spacing(n));
1230 }
1231 Token::Integer(i) => {
1232 elements.push(TextElement::Spacing(i as f32));
1233 }
1234 _ => {
1235 return Err(ParseError::SyntaxError {
1236 position: self.position,
1237 message: "Invalid element in text array".to_string(),
1238 });
1239 }
1240 }
1241 }
1242
1243 Ok(elements)
1244 }
1245
1246 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1247 let mut pattern = Vec::new();
1248
1249 for token in tokens {
1250 match token {
1251 Token::Number(n) => pattern.push(n),
1252 Token::Integer(i) => pattern.push(i as f32),
1253 _ => {
1254 return Err(ParseError::SyntaxError {
1255 position: self.position,
1256 message: "Invalid element in dash array".to_string(),
1257 });
1258 }
1259 }
1260 }
1261
1262 Ok(pattern)
1263 }
1264
1265 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1266 while self.position < self.tokens.len() {
1270 if let Token::Operator(op) = &self.tokens[self.position] {
1271 if op == "EI" {
1272 self.position += 1;
1273 break;
1274 }
1275 }
1276 self.position += 1;
1277 }
1278
1279 Ok(ContentOperation::BeginInlineImage)
1280 }
1281}
1282
1283#[cfg(test)]
1284mod tests {
1285 use super::*;
1286
1287 #[test]
1288 fn test_tokenize_numbers() {
1289 let input = b"123 -45 3.14 -0.5 .5";
1290 let mut tokenizer = ContentTokenizer::new(input);
1291
1292 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1293 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1294 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(3.14)));
1295 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1296 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1297 assert_eq!(tokenizer.next_token().unwrap(), None);
1298 }
1299
1300 #[test]
1301 fn test_tokenize_strings() {
1302 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1303 let mut tokenizer = ContentTokenizer::new(input);
1304
1305 assert_eq!(
1306 tokenizer.next_token().unwrap(),
1307 Some(Token::String(b"Hello World".to_vec()))
1308 );
1309 assert_eq!(
1310 tokenizer.next_token().unwrap(),
1311 Some(Token::String(b"Hello\nWorld".to_vec()))
1312 );
1313 assert_eq!(
1314 tokenizer.next_token().unwrap(),
1315 Some(Token::String(b"Nested (paren)".to_vec()))
1316 );
1317 }
1318
1319 #[test]
1320 fn test_tokenize_hex_strings() {
1321 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1322 let mut tokenizer = ContentTokenizer::new(input);
1323
1324 assert_eq!(
1325 tokenizer.next_token().unwrap(),
1326 Some(Token::HexString(b"Hello".to_vec()))
1327 );
1328 assert_eq!(
1329 tokenizer.next_token().unwrap(),
1330 Some(Token::HexString(b"Hello".to_vec()))
1331 );
1332 }
1333
1334 #[test]
1335 fn test_tokenize_names() {
1336 let input = b"/Name /Name#20with#20spaces /A#42C";
1337 let mut tokenizer = ContentTokenizer::new(input);
1338
1339 assert_eq!(
1340 tokenizer.next_token().unwrap(),
1341 Some(Token::Name("Name".to_string()))
1342 );
1343 assert_eq!(
1344 tokenizer.next_token().unwrap(),
1345 Some(Token::Name("Name with spaces".to_string()))
1346 );
1347 assert_eq!(
1348 tokenizer.next_token().unwrap(),
1349 Some(Token::Name("ABC".to_string()))
1350 );
1351 }
1352
1353 #[test]
1354 fn test_tokenize_operators() {
1355 let input = b"BT Tj ET q Q";
1356 let mut tokenizer = ContentTokenizer::new(input);
1357
1358 assert_eq!(
1359 tokenizer.next_token().unwrap(),
1360 Some(Token::Operator("BT".to_string()))
1361 );
1362 assert_eq!(
1363 tokenizer.next_token().unwrap(),
1364 Some(Token::Operator("Tj".to_string()))
1365 );
1366 assert_eq!(
1367 tokenizer.next_token().unwrap(),
1368 Some(Token::Operator("ET".to_string()))
1369 );
1370 assert_eq!(
1371 tokenizer.next_token().unwrap(),
1372 Some(Token::Operator("q".to_string()))
1373 );
1374 assert_eq!(
1375 tokenizer.next_token().unwrap(),
1376 Some(Token::Operator("Q".to_string()))
1377 );
1378 }
1379
1380 #[test]
1381 fn test_parse_text_operators() {
1382 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1383 let operators = ContentParser::parse(content).unwrap();
1384
1385 assert_eq!(operators.len(), 5);
1386 assert_eq!(operators[0], ContentOperation::BeginText);
1387 assert_eq!(
1388 operators[1],
1389 ContentOperation::SetFont("F1".to_string(), 12.0)
1390 );
1391 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1392 assert_eq!(
1393 operators[3],
1394 ContentOperation::ShowText(b"Hello World".to_vec())
1395 );
1396 assert_eq!(operators[4], ContentOperation::EndText);
1397 }
1398
1399 #[test]
1400 fn test_parse_graphics_operators() {
1401 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1402 let operators = ContentParser::parse(content).unwrap();
1403
1404 assert_eq!(operators.len(), 6);
1405 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1406 assert_eq!(
1407 operators[1],
1408 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1409 );
1410 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1411 assert_eq!(
1412 operators[3],
1413 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1414 );
1415 assert_eq!(operators[4], ContentOperation::Stroke);
1416 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1417 }
1418
1419 #[test]
1420 fn test_parse_color_operators() {
1421 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1422 let operators = ContentParser::parse(content).unwrap();
1423
1424 assert_eq!(operators.len(), 3);
1425 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1426 assert_eq!(
1427 operators[1],
1428 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1429 );
1430 assert_eq!(
1431 operators[2],
1432 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1433 );
1434 }
1435
1436 mod comprehensive_tests {
1438 use super::*;
1439
1440 #[test]
1441 fn test_all_text_operators() {
1442 let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1444 let operators = ContentParser::parse(content).unwrap();
1445
1446 assert_eq!(operators[0], ContentOperation::BeginText);
1447 assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1448 assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1449 assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1450 assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1451 assert_eq!(operators[5], ContentOperation::SetFont("F1".to_string(), 12.0));
1452 assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1453 assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1454 assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1455 assert_eq!(operators[9], ContentOperation::MoveTextSetLeading(50.0, 150.0));
1456 assert_eq!(operators[10], ContentOperation::NextLine);
1457 assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1458 assert_eq!(operators[12], ContentOperation::EndText);
1459 }
1460
1461 #[test]
1462 fn test_all_graphics_state_operators() {
1463 let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1465 let operators = ContentParser::parse(content).unwrap();
1466
1467 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1468 assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1469 assert_eq!(operators[2], ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0));
1470 assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1471 assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1472 assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1473 assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1474 assert_eq!(operators[7], ContentOperation::SetGraphicsStateParams("GS1".to_string()));
1475 assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1476 assert_eq!(operators[9], ContentOperation::SetIntent("Perceptual".to_string()));
1477 }
1478
1479 #[test]
1480 fn test_all_path_construction_operators() {
1481 let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1482 let operators = ContentParser::parse(content).unwrap();
1483
1484 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1485 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1486 assert_eq!(operators[2], ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0));
1487 assert_eq!(operators[3], ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0));
1488 assert_eq!(operators[4], ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0));
1489 assert_eq!(operators[5], ContentOperation::ClosePath);
1490 assert_eq!(operators[6], ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0));
1491 }
1492
1493 #[test]
1494 fn test_all_path_painting_operators() {
1495 let content = b"S s f F f* B B* b b* n W W*";
1496 let operators = ContentParser::parse(content).unwrap();
1497
1498 assert_eq!(operators[0], ContentOperation::Stroke);
1499 assert_eq!(operators[1], ContentOperation::CloseStroke);
1500 assert_eq!(operators[2], ContentOperation::Fill);
1501 assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1503 assert_eq!(operators[5], ContentOperation::FillStroke);
1504 assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1505 assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1506 assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1507 assert_eq!(operators[9], ContentOperation::EndPath);
1508 assert_eq!(operators[10], ContentOperation::Clip);
1509 assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1510 }
1511
1512 #[test]
1513 fn test_all_color_operators() {
1514 let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1516 let operators = ContentParser::parse(content).unwrap();
1517
1518 assert_eq!(operators[0], ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string()));
1519 assert_eq!(operators[1], ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string()));
1520 assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1521 assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1522 assert_eq!(operators[4], ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0));
1523 assert_eq!(operators[5], ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0));
1524 assert_eq!(operators[6], ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0));
1525 assert_eq!(operators[7], ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5));
1526 assert_eq!(operators[8], ContentOperation::ShadingFill("Shade1".to_string()));
1527 }
1528
1529 #[test]
1530 fn test_xobject_and_marked_content_operators() {
1531 let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1533 let operators = ContentParser::parse(content).unwrap();
1534
1535 assert_eq!(operators[0], ContentOperation::PaintXObject("Image1".to_string()));
1536 assert_eq!(operators[1], ContentOperation::BeginMarkedContent("MC1".to_string()));
1537 assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1538 assert_eq!(operators[3], ContentOperation::DefineMarkedContentPoint("MP1".to_string()));
1539 assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1540 assert_eq!(operators[5], ContentOperation::EndCompatibility);
1541 }
1542
1543 #[test]
1544 fn test_complex_content_stream() {
1545 let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1546 let operators = ContentParser::parse(content).unwrap();
1547
1548 assert_eq!(operators.len(), 8);
1549 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1550 assert_eq!(operators[1], ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0));
1551 assert_eq!(operators[2], ContentOperation::BeginText);
1552 assert_eq!(operators[3], ContentOperation::SetFont("F1".to_string(), 12.0));
1553 assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1554 assert_eq!(operators[5], ContentOperation::ShowText(b"Complex".to_vec()));
1555 assert_eq!(operators[6], ContentOperation::EndText);
1556 assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1557 }
1558
1559 #[test]
1560 fn test_tokenizer_whitespace_handling() {
1561 let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
1562 let mut tokenizer = ContentTokenizer::new(input);
1563
1564 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Operator("BT".to_string())));
1565 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Name("F1".to_string())));
1566 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1567 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Operator("Tf".to_string())));
1568 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Operator("ET".to_string())));
1569 assert_eq!(tokenizer.next_token().unwrap(), None);
1570 }
1571
1572 #[test]
1573 fn test_tokenizer_edge_cases() {
1574 let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1576 let mut tokenizer = ContentTokenizer::new(input);
1577
1578 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1579 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1580 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1581 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1582 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1583 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1584 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1585 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1586 }
1587
1588 #[test]
1589 fn test_string_parsing_edge_cases() {
1590 let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1591 let mut tokenizer = ContentTokenizer::new(input);
1592
1593 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"Simple".to_vec())));
1594 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With\\backslash".to_vec())));
1595 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With)paren".to_vec())));
1596 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With\newline".to_vec())));
1597 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With\ttab".to_vec())));
1598 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With\rcarriage".to_vec())));
1599 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With\x08backspace".to_vec())));
1600 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With\x0Cformfeed".to_vec())));
1601 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With(leftparen".to_vec())));
1602 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::String(b"With)rightparen".to_vec())));
1603 }
1604
1605 #[test]
1606 fn test_hex_string_parsing() {
1607 let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1608 let mut tokenizer = ContentTokenizer::new(input);
1609
1610 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::HexString(b"Hello".to_vec())));
1611 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::HexString(b"Hello".to_vec())));
1612 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::HexString(b"HelloW".to_vec())));
1613 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::HexString(b"Hello\x50".to_vec())));
1614 }
1615
1616 #[test]
1617 fn test_name_parsing_edge_cases() {
1618 let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1619 let mut tokenizer = ContentTokenizer::new(input);
1620
1621 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Name("Name".to_string())));
1622 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Name("Name with spaces".to_string())));
1623 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Name("Name#with#hash".to_string())));
1624 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Name("Name/with/slash".to_string())));
1625 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Name("EmptyName".to_string())));
1626 }
1627
1628 #[test]
1629 fn test_operator_parsing_edge_cases() {
1630 let content = b"q q q Q Q Q BT BT ET ET";
1631 let operators = ContentParser::parse(content).unwrap();
1632
1633 assert_eq!(operators.len(), 10);
1634 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1635 assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1636 assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1637 assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1638 assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1639 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1640 assert_eq!(operators[6], ContentOperation::BeginText);
1641 assert_eq!(operators[7], ContentOperation::BeginText);
1642 assert_eq!(operators[8], ContentOperation::EndText);
1643 assert_eq!(operators[9], ContentOperation::EndText);
1644 }
1645
1646 #[test]
1647 fn test_error_handling_insufficient_operands() {
1648 let content = b"100 Td"; let result = ContentParser::parse(content);
1650 assert!(result.is_err());
1651 }
1652
1653 #[test]
1654 fn test_error_handling_invalid_operator() {
1655 let content = b"100 200 INVALID";
1656 let result = ContentParser::parse(content);
1657 assert!(result.is_err());
1658 }
1659
1660 #[test]
1661 fn test_error_handling_malformed_string() {
1662 let input = b"(Unclosed string";
1664 let mut tokenizer = ContentTokenizer::new(input);
1665 let result = tokenizer.next_token();
1666 assert!(result.is_ok() || result.is_err());
1669 }
1670
1671 #[test]
1672 fn test_error_handling_malformed_hex_string() {
1673 let input = b"<48656C6C6G>";
1674 let mut tokenizer = ContentTokenizer::new(input);
1675 let result = tokenizer.next_token();
1676 assert!(result.is_err());
1677 }
1678
1679 #[test]
1680 fn test_error_handling_malformed_name() {
1681 let input = b"/Name#GG";
1682 let mut tokenizer = ContentTokenizer::new(input);
1683 let result = tokenizer.next_token();
1684 assert!(result.is_err());
1685 }
1686
1687 #[test]
1688 fn test_empty_content_stream() {
1689 let content = b"";
1690 let operators = ContentParser::parse(content).unwrap();
1691 assert_eq!(operators.len(), 0);
1692 }
1693
1694 #[test]
1695 fn test_whitespace_only_content_stream() {
1696 let content = b" \t\n\r ";
1697 let operators = ContentParser::parse(content).unwrap();
1698 assert_eq!(operators.len(), 0);
1699 }
1700
1701 #[test]
1702 fn test_mixed_integer_and_real_operands() {
1703 let content = b"100 200 m 150 200 l";
1705 let operators = ContentParser::parse(content).unwrap();
1706
1707 assert_eq!(operators.len(), 2);
1708 assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1709 assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1710 }
1711
1712 #[test]
1713 fn test_negative_operands() {
1714 let content = b"-100 -200 Td -50.5 -75.2 TD";
1715 let operators = ContentParser::parse(content).unwrap();
1716
1717 assert_eq!(operators.len(), 2);
1718 assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
1719 assert_eq!(operators[1], ContentOperation::MoveTextSetLeading(-50.5, -75.2));
1720 }
1721
1722 #[test]
1723 fn test_large_numbers() {
1724 let content = b"999999.999999 -999999.999999 m";
1725 let operators = ContentParser::parse(content).unwrap();
1726
1727 assert_eq!(operators.len(), 1);
1728 assert_eq!(operators[0], ContentOperation::MoveTo(999999.999999, -999999.999999));
1729 }
1730
1731 #[test]
1732 fn test_scientific_notation() {
1733 let content = b"123.45 -456.78 m";
1735 let operators = ContentParser::parse(content).unwrap();
1736
1737 assert_eq!(operators.len(), 1);
1738 assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
1739 }
1740
1741 #[test]
1742 fn test_show_text_array_complex() {
1743 let content = b"(Hello) TJ";
1745 let result = ContentParser::parse(content);
1746 assert!(result.is_err());
1748 }
1749
1750 #[test]
1751 fn test_dash_pattern_empty() {
1752 let content = b"0 d";
1754 let result = ContentParser::parse(content);
1755 assert!(result.is_err());
1757 }
1758
1759 #[test]
1760 fn test_dash_pattern_complex() {
1761 let content = b"2.5 d";
1763 let result = ContentParser::parse(content);
1764 assert!(result.is_err());
1766 }
1767
1768 #[test]
1769 fn test_inline_image_handling() {
1770 let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
1771 let operators = ContentParser::parse(content).unwrap();
1772
1773 assert_eq!(operators.len(), 1);
1774 assert_eq!(operators[0], ContentOperation::BeginInlineImage);
1775 }
1776
1777 #[test]
1778 fn test_content_parser_performance() {
1779 let mut content = Vec::new();
1780 for i in 0..1000 {
1781 content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
1782 }
1783
1784 let start = std::time::Instant::now();
1785 let operators = ContentParser::parse(&content).unwrap();
1786 let duration = start.elapsed();
1787
1788 assert_eq!(operators.len(), 1000);
1789 assert!(duration.as_millis() < 100); }
1791
1792 #[test]
1793 fn test_tokenizer_performance() {
1794 let mut input = Vec::new();
1795 for i in 0..1000 {
1796 input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
1797 }
1798
1799 let start = std::time::Instant::now();
1800 let mut tokenizer = ContentTokenizer::new(&input);
1801 let mut count = 0;
1802 while tokenizer.next_token().unwrap().is_some() {
1803 count += 1;
1804 }
1805 let duration = start.elapsed();
1806
1807 assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
1810
1811 #[test]
1812 fn test_memory_usage_large_content() {
1813 let mut content = Vec::new();
1814 for i in 0..10000 {
1815 content.extend_from_slice(format!("{} {} {} {} {} {} c ", i, i+1, i+2, i+3, i+4, i+5).as_bytes());
1816 }
1817
1818 let operators = ContentParser::parse(&content).unwrap();
1819 assert_eq!(operators.len(), 10000);
1820
1821 for op in operators {
1823 matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
1824 }
1825 }
1826
1827 #[test]
1828 fn test_concurrent_parsing() {
1829 use std::thread;
1830 use std::sync::Arc;
1831
1832 let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
1833 let handles: Vec<_> = (0..10)
1834 .map(|_| {
1835 let content_clone = content.clone();
1836 thread::spawn(move || {
1837 ContentParser::parse(&content_clone).unwrap()
1838 })
1839 })
1840 .collect();
1841
1842 for handle in handles {
1843 let operators = handle.join().unwrap();
1844 assert_eq!(operators.len(), 5);
1845 assert_eq!(operators[0], ContentOperation::BeginText);
1846 assert_eq!(operators[4], ContentOperation::EndText);
1847 }
1848 }
1849 }
1850}