1use super::{ParseError, ParseResult};
57use std::collections::HashMap;
58
59#[derive(Debug, Clone, PartialEq)]
93pub enum ContentOperation {
94 BeginText,
98
99 EndText,
102
103 SetCharSpacing(f32),
107
108 SetWordSpacing(f32),
111
112 SetHorizontalScaling(f32),
115
116 SetLeading(f32),
119
120 SetFont(String, f32),
123
124 SetTextRenderMode(i32),
127
128 SetTextRise(f32),
131
132 MoveText(f32, f32),
136
137 MoveTextSetLeading(f32, f32),
140
141 SetTextMatrix(f32, f32, f32, f32, f32, f32),
144
145 NextLine,
148
149 ShowText(Vec<u8>),
153
154 ShowTextArray(Vec<TextElement>),
157
158 NextLineShowText(Vec<u8>),
161
162 SetSpacingNextLineShowText(f32, f32, Vec<u8>),
165
166 SaveGraphicsState,
170
171 RestoreGraphicsState,
174
175 SetTransformMatrix(f32, f32, f32, f32, f32, f32),
178
179 SetLineWidth(f32),
181
182 SetLineCap(i32),
185
186 SetLineJoin(i32),
189
190 SetMiterLimit(f32),
193
194 SetDashPattern(Vec<f32>, f32),
197
198 SetIntent(String),
201
202 SetFlatness(f32),
205
206 SetGraphicsStateParams(String),
209
210 MoveTo(f32, f32),
213
214 LineTo(f32, f32),
216
217 CurveTo(f32, f32, f32, f32, f32, f32),
220
221 CurveToV(f32, f32, f32, f32),
223
224 CurveToY(f32, f32, f32, f32),
226
227 ClosePath,
230
231 Rectangle(f32, f32, f32, f32),
234
235 Stroke,
238
239 CloseStroke,
242
243 Fill,
245
246 FillEvenOdd,
248
249 FillStroke,
252
253 FillStrokeEvenOdd,
255
256 CloseFillStroke,
259
260 CloseFillStrokeEvenOdd,
262
263 EndPath,
266
267 Clip, ClipEvenOdd, SetStrokingColorSpace(String),
275
276 SetNonStrokingColorSpace(String),
279
280 SetStrokingColor(Vec<f32>),
283
284 SetNonStrokingColor(Vec<f32>),
287
288 SetStrokingGray(f32),
291
292 SetNonStrokingGray(f32),
294
295 SetStrokingRGB(f32, f32, f32),
298
299 SetNonStrokingRGB(f32, f32, f32),
301
302 SetStrokingCMYK(f32, f32, f32, f32),
304
305 SetNonStrokingCMYK(f32, f32, f32, f32),
307
308 ShadingFill(String), BeginInlineImage, InlineImageData(Vec<u8>), PaintXObject(String),
319
320 BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>), BeginCompatibility, EndCompatibility, }
331
332#[derive(Debug, Clone, PartialEq)]
351pub enum TextElement {
352 Text(Vec<u8>),
354 Spacing(f32),
357}
358
359#[derive(Debug, Clone, PartialEq)]
361pub(super) enum Token {
362 Number(f32),
363 Integer(i32),
364 String(Vec<u8>),
365 HexString(Vec<u8>),
366 Name(String),
367 Operator(String),
368 ArrayStart,
369 ArrayEnd,
370 DictStart,
371 DictEnd,
372}
373
374pub struct ContentTokenizer<'a> {
376 input: &'a [u8],
377 position: usize,
378}
379
380impl<'a> ContentTokenizer<'a> {
381 pub fn new(input: &'a [u8]) -> Self {
383 Self { input, position: 0 }
384 }
385
386 pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
388 self.skip_whitespace();
389
390 if self.position >= self.input.len() {
391 return Ok(None);
392 }
393
394 let ch = self.input[self.position];
395
396 match ch {
397 b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
399
400 b'(' => self.read_literal_string(),
402 b'<' => {
403 if self.peek_next() == Some(b'<') {
404 self.position += 2;
405 Ok(Some(Token::DictStart))
406 } else {
407 self.read_hex_string()
408 }
409 }
410 b'>' => {
411 if self.peek_next() == Some(b'>') {
412 self.position += 2;
413 Ok(Some(Token::DictEnd))
414 } else {
415 Err(ParseError::SyntaxError {
416 position: self.position,
417 message: "Unexpected '>'".to_string(),
418 })
419 }
420 }
421
422 b'[' => {
424 self.position += 1;
425 Ok(Some(Token::ArrayStart))
426 }
427 b']' => {
428 self.position += 1;
429 Ok(Some(Token::ArrayEnd))
430 }
431
432 b'/' => self.read_name(),
434
435 _ => self.read_operator(),
437 }
438 }
439
440 fn skip_whitespace(&mut self) {
441 while self.position < self.input.len() {
442 match self.input[self.position] {
443 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
444 b'%' => self.skip_comment(),
445 _ => break,
446 }
447 }
448 }
449
450 fn skip_comment(&mut self) {
451 while self.position < self.input.len() && self.input[self.position] != b'\n' {
452 self.position += 1;
453 }
454 }
455
456 fn peek_next(&self) -> Option<u8> {
457 if self.position + 1 < self.input.len() {
458 Some(self.input[self.position + 1])
459 } else {
460 None
461 }
462 }
463
464 fn read_number(&mut self) -> ParseResult<Option<Token>> {
465 let start = self.position;
466 let mut has_dot = false;
467
468 if self.position < self.input.len()
470 && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
471 {
472 self.position += 1;
473 }
474
475 while self.position < self.input.len() {
477 match self.input[self.position] {
478 b'0'..=b'9' => self.position += 1,
479 b'.' if !has_dot => {
480 has_dot = true;
481 self.position += 1;
482 }
483 _ => break,
484 }
485 }
486
487 let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
488 ParseError::SyntaxError {
489 position: start,
490 message: "Invalid number format".to_string(),
491 }
492 })?;
493
494 if has_dot {
495 let value = num_str
496 .parse::<f32>()
497 .map_err(|_| ParseError::SyntaxError {
498 position: start,
499 message: "Invalid float number".to_string(),
500 })?;
501 Ok(Some(Token::Number(value)))
502 } else {
503 let value = num_str
504 .parse::<i32>()
505 .map_err(|_| ParseError::SyntaxError {
506 position: start,
507 message: "Invalid integer number".to_string(),
508 })?;
509 Ok(Some(Token::Integer(value)))
510 }
511 }
512
513 fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
514 self.position += 1; let mut result = Vec::new();
516 let mut paren_depth = 1;
517 let mut escape = false;
518
519 while self.position < self.input.len() && paren_depth > 0 {
520 let ch = self.input[self.position];
521 self.position += 1;
522
523 if escape {
524 match ch {
525 b'n' => result.push(b'\n'),
526 b'r' => result.push(b'\r'),
527 b't' => result.push(b'\t'),
528 b'b' => result.push(b'\x08'),
529 b'f' => result.push(b'\x0C'),
530 b'(' => result.push(b'('),
531 b')' => result.push(b')'),
532 b'\\' => result.push(b'\\'),
533 b'0'..=b'7' => {
534 self.position -= 1;
536 let octal_value = self.read_octal_escape()?;
537 result.push(octal_value);
538 }
539 _ => result.push(ch), }
541 escape = false;
542 } else {
543 match ch {
544 b'\\' => escape = true,
545 b'(' => {
546 paren_depth += 1;
547 result.push(ch);
548 }
549 b')' => {
550 paren_depth -= 1;
551 if paren_depth > 0 {
552 result.push(ch);
553 }
554 }
555 _ => result.push(ch),
556 }
557 }
558 }
559
560 Ok(Some(Token::String(result)))
561 }
562
563 fn read_octal_escape(&mut self) -> ParseResult<u8> {
564 let mut value = 0u8;
565 let mut count = 0;
566
567 while count < 3 && self.position < self.input.len() {
568 match self.input[self.position] {
569 b'0'..=b'7' => {
570 value = value * 8 + (self.input[self.position] - b'0');
571 self.position += 1;
572 count += 1;
573 }
574 _ => break,
575 }
576 }
577
578 Ok(value)
579 }
580
581 fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
582 self.position += 1; let mut result = Vec::new();
584 let mut nibble = None;
585
586 while self.position < self.input.len() {
587 let ch = self.input[self.position];
588
589 match ch {
590 b'>' => {
591 self.position += 1;
592 if let Some(n) = nibble {
594 result.push(n << 4);
595 }
596 return Ok(Some(Token::HexString(result)));
597 }
598 b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
599 let digit = if ch <= b'9' {
600 ch - b'0'
601 } else if ch <= b'F' {
602 ch - b'A' + 10
603 } else {
604 ch - b'a' + 10
605 };
606
607 if let Some(n) = nibble {
608 result.push((n << 4) | digit);
609 nibble = None;
610 } else {
611 nibble = Some(digit);
612 }
613 self.position += 1;
614 }
615 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
616 self.position += 1;
618 }
619 _ => {
620 return Err(ParseError::SyntaxError {
621 position: self.position,
622 message: format!("Invalid character in hex string: {:?}", ch as char),
623 });
624 }
625 }
626 }
627
628 Err(ParseError::SyntaxError {
629 position: self.position,
630 message: "Unterminated hex string".to_string(),
631 })
632 }
633
634 fn read_name(&mut self) -> ParseResult<Option<Token>> {
635 self.position += 1; let start = self.position;
637
638 while self.position < self.input.len() {
639 let ch = self.input[self.position];
640 match ch {
641 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
642 | b']' | b'{' | b'}' | b'/' | b'%' => break,
643 b'#' => {
644 self.position += 1;
646 if self.position + 1 < self.input.len() {
647 self.position += 2;
648 }
649 }
650 _ => self.position += 1,
651 }
652 }
653
654 let name_bytes = &self.input[start..self.position];
655 let name = self.decode_name(name_bytes)?;
656 Ok(Some(Token::Name(name)))
657 }
658
659 fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
660 let mut result = Vec::new();
661 let mut i = 0;
662
663 while i < bytes.len() {
664 if bytes[i] == b'#' && i + 2 < bytes.len() {
665 let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
667 ParseError::SyntaxError {
668 position: self.position,
669 message: "Invalid hex escape in name".to_string(),
670 }
671 })?;
672 let value =
673 u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
674 position: self.position,
675 message: "Invalid hex escape in name".to_string(),
676 })?;
677 result.push(value);
678 i += 3;
679 } else {
680 result.push(bytes[i]);
681 i += 1;
682 }
683 }
684
685 String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
686 position: self.position,
687 message: "Invalid UTF-8 in name".to_string(),
688 })
689 }
690
691 fn read_operator(&mut self) -> ParseResult<Option<Token>> {
692 let start = self.position;
693
694 while self.position < self.input.len() {
695 let ch = self.input[self.position];
696 match ch {
697 b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
698 | b']' | b'{' | b'}' | b'/' | b'%' => break,
699 _ => self.position += 1,
700 }
701 }
702
703 let op_bytes = &self.input[start..self.position];
704 let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
705 position: start,
706 message: "Invalid operator".to_string(),
707 })?;
708
709 Ok(Some(Token::Operator(op.to_string())))
710 }
711}
712
713pub struct ContentParser {
732 tokens: Vec<Token>,
733 position: usize,
734}
735
736impl ContentParser {
737 pub fn new(_content: &[u8]) -> Self {
739 Self {
740 tokens: Vec::new(),
741 position: 0,
742 }
743 }
744
745 pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
779 Self::parse_content(content)
780 }
781
782 pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
787 let mut tokenizer = ContentTokenizer::new(content);
788 let mut tokens = Vec::new();
789
790 while let Some(token) = tokenizer.next_token()? {
792 tokens.push(token);
793 }
794
795 let mut parser = Self {
796 tokens,
797 position: 0,
798 };
799
800 parser.parse_operators()
801 }
802
803 fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
804 let mut operators = Vec::new();
805 let mut operand_stack: Vec<Token> = Vec::new();
806
807 while self.position < self.tokens.len() {
808 let token = self.tokens[self.position].clone();
809 self.position += 1;
810
811 match &token {
812 Token::Operator(op) => {
813 let operator = self.parse_operator(op, &mut operand_stack)?;
814 operators.push(operator);
815 }
816 _ => {
817 operand_stack.push(token);
819 }
820 }
821 }
822
823 Ok(operators)
824 }
825
826 fn parse_operator(
827 &mut self,
828 op: &str,
829 operands: &mut Vec<Token>,
830 ) -> ParseResult<ContentOperation> {
831 let operator = match op {
832 "BT" => ContentOperation::BeginText,
834 "ET" => ContentOperation::EndText,
835
836 "Tc" => {
838 let spacing = self.pop_number(operands)?;
839 ContentOperation::SetCharSpacing(spacing)
840 }
841 "Tw" => {
842 let spacing = self.pop_number(operands)?;
843 ContentOperation::SetWordSpacing(spacing)
844 }
845 "Tz" => {
846 let scale = self.pop_number(operands)?;
847 ContentOperation::SetHorizontalScaling(scale)
848 }
849 "TL" => {
850 let leading = self.pop_number(operands)?;
851 ContentOperation::SetLeading(leading)
852 }
853 "Tf" => {
854 let size = self.pop_number(operands)?;
855 let font = self.pop_name(operands)?;
856 ContentOperation::SetFont(font, size)
857 }
858 "Tr" => {
859 let mode = self.pop_integer(operands)?;
860 ContentOperation::SetTextRenderMode(mode)
861 }
862 "Ts" => {
863 let rise = self.pop_number(operands)?;
864 ContentOperation::SetTextRise(rise)
865 }
866
867 "Td" => {
869 let ty = self.pop_number(operands)?;
870 let tx = self.pop_number(operands)?;
871 ContentOperation::MoveText(tx, ty)
872 }
873 "TD" => {
874 let ty = self.pop_number(operands)?;
875 let tx = self.pop_number(operands)?;
876 ContentOperation::MoveTextSetLeading(tx, ty)
877 }
878 "Tm" => {
879 let f = self.pop_number(operands)?;
880 let e = self.pop_number(operands)?;
881 let d = self.pop_number(operands)?;
882 let c = self.pop_number(operands)?;
883 let b = self.pop_number(operands)?;
884 let a = self.pop_number(operands)?;
885 ContentOperation::SetTextMatrix(a, b, c, d, e, f)
886 }
887 "T*" => ContentOperation::NextLine,
888
889 "Tj" => {
891 let text = self.pop_string(operands)?;
892 ContentOperation::ShowText(text)
893 }
894 "TJ" => {
895 let array = self.pop_array(operands)?;
896 let elements = self.parse_text_array(array)?;
897 ContentOperation::ShowTextArray(elements)
898 }
899 "'" => {
900 let text = self.pop_string(operands)?;
901 ContentOperation::NextLineShowText(text)
902 }
903 "\"" => {
904 let text = self.pop_string(operands)?;
905 let aw = self.pop_number(operands)?;
906 let ac = self.pop_number(operands)?;
907 ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
908 }
909
910 "q" => ContentOperation::SaveGraphicsState,
912 "Q" => ContentOperation::RestoreGraphicsState,
913 "cm" => {
914 let f = self.pop_number(operands)?;
915 let e = self.pop_number(operands)?;
916 let d = self.pop_number(operands)?;
917 let c = self.pop_number(operands)?;
918 let b = self.pop_number(operands)?;
919 let a = self.pop_number(operands)?;
920 ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
921 }
922 "w" => {
923 let width = self.pop_number(operands)?;
924 ContentOperation::SetLineWidth(width)
925 }
926 "J" => {
927 let cap = self.pop_integer(operands)?;
928 ContentOperation::SetLineCap(cap)
929 }
930 "j" => {
931 let join = self.pop_integer(operands)?;
932 ContentOperation::SetLineJoin(join)
933 }
934 "M" => {
935 let limit = self.pop_number(operands)?;
936 ContentOperation::SetMiterLimit(limit)
937 }
938 "d" => {
939 let phase = self.pop_number(operands)?;
940 let array = self.pop_array(operands)?;
941 let pattern = self.parse_dash_array(array)?;
942 ContentOperation::SetDashPattern(pattern, phase)
943 }
944 "ri" => {
945 let intent = self.pop_name(operands)?;
946 ContentOperation::SetIntent(intent)
947 }
948 "i" => {
949 let flatness = self.pop_number(operands)?;
950 ContentOperation::SetFlatness(flatness)
951 }
952 "gs" => {
953 let name = self.pop_name(operands)?;
954 ContentOperation::SetGraphicsStateParams(name)
955 }
956
957 "m" => {
959 let y = self.pop_number(operands)?;
960 let x = self.pop_number(operands)?;
961 ContentOperation::MoveTo(x, y)
962 }
963 "l" => {
964 let y = self.pop_number(operands)?;
965 let x = self.pop_number(operands)?;
966 ContentOperation::LineTo(x, y)
967 }
968 "c" => {
969 let y3 = self.pop_number(operands)?;
970 let x3 = self.pop_number(operands)?;
971 let y2 = self.pop_number(operands)?;
972 let x2 = self.pop_number(operands)?;
973 let y1 = self.pop_number(operands)?;
974 let x1 = self.pop_number(operands)?;
975 ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
976 }
977 "v" => {
978 let y3 = self.pop_number(operands)?;
979 let x3 = self.pop_number(operands)?;
980 let y2 = self.pop_number(operands)?;
981 let x2 = self.pop_number(operands)?;
982 ContentOperation::CurveToV(x2, y2, x3, y3)
983 }
984 "y" => {
985 let y3 = self.pop_number(operands)?;
986 let x3 = self.pop_number(operands)?;
987 let y1 = self.pop_number(operands)?;
988 let x1 = self.pop_number(operands)?;
989 ContentOperation::CurveToY(x1, y1, x3, y3)
990 }
991 "h" => ContentOperation::ClosePath,
992 "re" => {
993 let height = self.pop_number(operands)?;
994 let width = self.pop_number(operands)?;
995 let y = self.pop_number(operands)?;
996 let x = self.pop_number(operands)?;
997 ContentOperation::Rectangle(x, y, width, height)
998 }
999
1000 "S" => ContentOperation::Stroke,
1002 "s" => ContentOperation::CloseStroke,
1003 "f" | "F" => ContentOperation::Fill,
1004 "f*" => ContentOperation::FillEvenOdd,
1005 "B" => ContentOperation::FillStroke,
1006 "B*" => ContentOperation::FillStrokeEvenOdd,
1007 "b" => ContentOperation::CloseFillStroke,
1008 "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1009 "n" => ContentOperation::EndPath,
1010
1011 "W" => ContentOperation::Clip,
1013 "W*" => ContentOperation::ClipEvenOdd,
1014
1015 "CS" => {
1017 let name = self.pop_name(operands)?;
1018 ContentOperation::SetStrokingColorSpace(name)
1019 }
1020 "cs" => {
1021 let name = self.pop_name(operands)?;
1022 ContentOperation::SetNonStrokingColorSpace(name)
1023 }
1024 "SC" | "SCN" => {
1025 let components = self.pop_color_components(operands)?;
1026 ContentOperation::SetStrokingColor(components)
1027 }
1028 "sc" | "scn" => {
1029 let components = self.pop_color_components(operands)?;
1030 ContentOperation::SetNonStrokingColor(components)
1031 }
1032 "G" => {
1033 let gray = self.pop_number(operands)?;
1034 ContentOperation::SetStrokingGray(gray)
1035 }
1036 "g" => {
1037 let gray = self.pop_number(operands)?;
1038 ContentOperation::SetNonStrokingGray(gray)
1039 }
1040 "RG" => {
1041 let b = self.pop_number(operands)?;
1042 let g = self.pop_number(operands)?;
1043 let r = self.pop_number(operands)?;
1044 ContentOperation::SetStrokingRGB(r, g, b)
1045 }
1046 "rg" => {
1047 let b = self.pop_number(operands)?;
1048 let g = self.pop_number(operands)?;
1049 let r = self.pop_number(operands)?;
1050 ContentOperation::SetNonStrokingRGB(r, g, b)
1051 }
1052 "K" => {
1053 let k = self.pop_number(operands)?;
1054 let y = self.pop_number(operands)?;
1055 let m = self.pop_number(operands)?;
1056 let c = self.pop_number(operands)?;
1057 ContentOperation::SetStrokingCMYK(c, m, y, k)
1058 }
1059 "k" => {
1060 let k = self.pop_number(operands)?;
1061 let y = self.pop_number(operands)?;
1062 let m = self.pop_number(operands)?;
1063 let c = self.pop_number(operands)?;
1064 ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1065 }
1066
1067 "sh" => {
1069 let name = self.pop_name(operands)?;
1070 ContentOperation::ShadingFill(name)
1071 }
1072
1073 "Do" => {
1075 let name = self.pop_name(operands)?;
1076 ContentOperation::PaintXObject(name)
1077 }
1078
1079 "BMC" => {
1081 let tag = self.pop_name(operands)?;
1082 ContentOperation::BeginMarkedContent(tag)
1083 }
1084 "BDC" => {
1085 let props = self.pop_dict_or_name(operands)?;
1086 let tag = self.pop_name(operands)?;
1087 ContentOperation::BeginMarkedContentWithProps(tag, props)
1088 }
1089 "EMC" => ContentOperation::EndMarkedContent,
1090 "MP" => {
1091 let tag = self.pop_name(operands)?;
1092 ContentOperation::DefineMarkedContentPoint(tag)
1093 }
1094 "DP" => {
1095 let props = self.pop_dict_or_name(operands)?;
1096 let tag = self.pop_name(operands)?;
1097 ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1098 }
1099
1100 "BX" => ContentOperation::BeginCompatibility,
1102 "EX" => ContentOperation::EndCompatibility,
1103
1104 "BI" => {
1106 operands.clear(); self.parse_inline_image()?
1108 }
1109
1110 _ => {
1111 return Err(ParseError::SyntaxError {
1112 position: self.position,
1113 message: format!("Unknown operator: {op}"),
1114 });
1115 }
1116 };
1117
1118 operands.clear(); Ok(operator)
1120 }
1121
1122 fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1124 match operands.pop() {
1125 Some(Token::Number(n)) => Ok(n),
1126 Some(Token::Integer(i)) => Ok(i as f32),
1127 _ => Err(ParseError::SyntaxError {
1128 position: self.position,
1129 message: "Expected number operand".to_string(),
1130 }),
1131 }
1132 }
1133
1134 fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1135 match operands.pop() {
1136 Some(Token::Integer(i)) => Ok(i),
1137 _ => Err(ParseError::SyntaxError {
1138 position: self.position,
1139 message: "Expected integer operand".to_string(),
1140 }),
1141 }
1142 }
1143
1144 fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1145 match operands.pop() {
1146 Some(Token::Name(n)) => Ok(n),
1147 _ => Err(ParseError::SyntaxError {
1148 position: self.position,
1149 message: "Expected name operand".to_string(),
1150 }),
1151 }
1152 }
1153
1154 fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1155 match operands.pop() {
1156 Some(Token::String(s)) => Ok(s),
1157 Some(Token::HexString(s)) => Ok(s),
1158 _ => Err(ParseError::SyntaxError {
1159 position: self.position,
1160 message: "Expected string operand".to_string(),
1161 }),
1162 }
1163 }
1164
1165 fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1166 let mut array = Vec::new();
1167 let mut found_start = false;
1168
1169 while let Some(token) = operands.pop() {
1171 match token {
1172 Token::ArrayStart => {
1173 found_start = true;
1174 break;
1175 }
1176 _ => array.push(token),
1177 }
1178 }
1179
1180 if !found_start {
1181 return Err(ParseError::SyntaxError {
1182 position: self.position,
1183 message: "Expected array".to_string(),
1184 });
1185 }
1186
1187 array.reverse(); Ok(array)
1189 }
1190
1191 fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1192 operands.pop();
1195 Ok(HashMap::new())
1196 }
1197
1198 fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1199 let mut components = Vec::new();
1200
1201 while let Some(token) = operands.last() {
1203 match token {
1204 Token::Number(n) => {
1205 components.push(*n);
1206 operands.pop();
1207 }
1208 Token::Integer(i) => {
1209 components.push(*i as f32);
1210 operands.pop();
1211 }
1212 _ => break,
1213 }
1214 }
1215
1216 components.reverse();
1217 Ok(components)
1218 }
1219
1220 fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1221 let mut elements = Vec::new();
1222
1223 for token in tokens {
1224 match token {
1225 Token::String(s) | Token::HexString(s) => {
1226 elements.push(TextElement::Text(s));
1227 }
1228 Token::Number(n) => {
1229 elements.push(TextElement::Spacing(n));
1230 }
1231 Token::Integer(i) => {
1232 elements.push(TextElement::Spacing(i as f32));
1233 }
1234 _ => {
1235 return Err(ParseError::SyntaxError {
1236 position: self.position,
1237 message: "Invalid element in text array".to_string(),
1238 });
1239 }
1240 }
1241 }
1242
1243 Ok(elements)
1244 }
1245
1246 fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1247 let mut pattern = Vec::new();
1248
1249 for token in tokens {
1250 match token {
1251 Token::Number(n) => pattern.push(n),
1252 Token::Integer(i) => pattern.push(i as f32),
1253 _ => {
1254 return Err(ParseError::SyntaxError {
1255 position: self.position,
1256 message: "Invalid element in dash array".to_string(),
1257 });
1258 }
1259 }
1260 }
1261
1262 Ok(pattern)
1263 }
1264
1265 fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1266 while self.position < self.tokens.len() {
1270 if let Token::Operator(op) = &self.tokens[self.position] {
1271 if op == "EI" {
1272 self.position += 1;
1273 break;
1274 }
1275 }
1276 self.position += 1;
1277 }
1278
1279 Ok(ContentOperation::BeginInlineImage)
1280 }
1281}
1282
1283#[cfg(test)]
1284mod tests {
1285 use super::*;
1286
1287 #[test]
1288 fn test_tokenize_numbers() {
1289 let input = b"123 -45 3.14 -0.5 .5";
1290 let mut tokenizer = ContentTokenizer::new(input);
1291
1292 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1293 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1294 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(3.14)));
1295 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1296 assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1297 assert_eq!(tokenizer.next_token().unwrap(), None);
1298 }
1299
1300 #[test]
1301 fn test_tokenize_strings() {
1302 let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1303 let mut tokenizer = ContentTokenizer::new(input);
1304
1305 assert_eq!(
1306 tokenizer.next_token().unwrap(),
1307 Some(Token::String(b"Hello World".to_vec()))
1308 );
1309 assert_eq!(
1310 tokenizer.next_token().unwrap(),
1311 Some(Token::String(b"Hello\nWorld".to_vec()))
1312 );
1313 assert_eq!(
1314 tokenizer.next_token().unwrap(),
1315 Some(Token::String(b"Nested (paren)".to_vec()))
1316 );
1317 }
1318
1319 #[test]
1320 fn test_tokenize_hex_strings() {
1321 let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1322 let mut tokenizer = ContentTokenizer::new(input);
1323
1324 assert_eq!(
1325 tokenizer.next_token().unwrap(),
1326 Some(Token::HexString(b"Hello".to_vec()))
1327 );
1328 assert_eq!(
1329 tokenizer.next_token().unwrap(),
1330 Some(Token::HexString(b"Hello".to_vec()))
1331 );
1332 }
1333
1334 #[test]
1335 fn test_tokenize_names() {
1336 let input = b"/Name /Name#20with#20spaces /A#42C";
1337 let mut tokenizer = ContentTokenizer::new(input);
1338
1339 assert_eq!(
1340 tokenizer.next_token().unwrap(),
1341 Some(Token::Name("Name".to_string()))
1342 );
1343 assert_eq!(
1344 tokenizer.next_token().unwrap(),
1345 Some(Token::Name("Name with spaces".to_string()))
1346 );
1347 assert_eq!(
1348 tokenizer.next_token().unwrap(),
1349 Some(Token::Name("ABC".to_string()))
1350 );
1351 }
1352
1353 #[test]
1354 fn test_tokenize_operators() {
1355 let input = b"BT Tj ET q Q";
1356 let mut tokenizer = ContentTokenizer::new(input);
1357
1358 assert_eq!(
1359 tokenizer.next_token().unwrap(),
1360 Some(Token::Operator("BT".to_string()))
1361 );
1362 assert_eq!(
1363 tokenizer.next_token().unwrap(),
1364 Some(Token::Operator("Tj".to_string()))
1365 );
1366 assert_eq!(
1367 tokenizer.next_token().unwrap(),
1368 Some(Token::Operator("ET".to_string()))
1369 );
1370 assert_eq!(
1371 tokenizer.next_token().unwrap(),
1372 Some(Token::Operator("q".to_string()))
1373 );
1374 assert_eq!(
1375 tokenizer.next_token().unwrap(),
1376 Some(Token::Operator("Q".to_string()))
1377 );
1378 }
1379
1380 #[test]
1381 fn test_parse_text_operators() {
1382 let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1383 let operators = ContentParser::parse(content).unwrap();
1384
1385 assert_eq!(operators.len(), 5);
1386 assert_eq!(operators[0], ContentOperation::BeginText);
1387 assert_eq!(
1388 operators[1],
1389 ContentOperation::SetFont("F1".to_string(), 12.0)
1390 );
1391 assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1392 assert_eq!(
1393 operators[3],
1394 ContentOperation::ShowText(b"Hello World".to_vec())
1395 );
1396 assert_eq!(operators[4], ContentOperation::EndText);
1397 }
1398
1399 #[test]
1400 fn test_parse_graphics_operators() {
1401 let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1402 let operators = ContentParser::parse(content).unwrap();
1403
1404 assert_eq!(operators.len(), 6);
1405 assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1406 assert_eq!(
1407 operators[1],
1408 ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1409 );
1410 assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1411 assert_eq!(
1412 operators[3],
1413 ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1414 );
1415 assert_eq!(operators[4], ContentOperation::Stroke);
1416 assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1417 }
1418
1419 #[test]
1420 fn test_parse_color_operators() {
1421 let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1422 let operators = ContentParser::parse(content).unwrap();
1423
1424 assert_eq!(operators.len(), 3);
1425 assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1426 assert_eq!(
1427 operators[1],
1428 ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1429 );
1430 assert_eq!(
1431 operators[2],
1432 ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1433 );
1434 }
1435}