1use nom::IResult;
2use serde::{Deserialize, Serialize};
3use std::collections::HashMap;
4
5#[derive(Debug, Clone, PartialEq)]
6pub enum ContentOperator {
7 BeginText,
8 EndText,
9
10 SetCharSpace(f64),
11 SetWordSpace(f64),
12 SetHorizontalScale(f64),
13 SetLeading(f64),
14 SetFont(String, f64),
15 SetTextRenderMode(i32),
16 SetTextRise(f64),
17
18 MoveText(f64, f64),
19 MoveTextNextLine,
20 SetTextMatrix(f64, f64, f64, f64, f64, f64),
21
22 ShowText(Vec<u8>),
23 ShowTextArray(Vec<TextArrayElement>),
24 ShowTextNextLine(Vec<u8>),
25 ShowTextWithSpacing(f64, f64, Vec<u8>),
26
27 MoveTo(f64, f64),
28 LineTo(f64, f64),
29 CurveTo(f64, f64, f64, f64, f64, f64),
30 CurveToV(f64, f64, f64, f64),
31 CurveToY(f64, f64, f64, f64),
32 ClosePath,
33 Rectangle(f64, f64, f64, f64),
34
35 Stroke,
36 CloseAndStroke,
37 Fill,
38 FillEvenOdd,
39 FillAndStroke,
40 FillAndStrokeEvenOdd,
41 CloseFillAndStroke,
42 CloseFillAndStrokeEvenOdd,
43 EndPath,
44
45 Clip,
46 ClipEvenOdd,
47
48 SetLineWidth(f64),
49 SetLineCap(i32),
50 SetLineJoin(i32),
51 SetMiterLimit(f64),
52 SetDashPattern(Vec<f64>, f64),
53 SetRenderingIntent(String),
54 SetFlatness(f64),
55
56 Save,
57 Restore,
58 SetMatrix(f64, f64, f64, f64, f64, f64),
59
60 BeginMarkedContent(String),
61 BeginMarkedContentWithProps(String, MarkedContentProps),
62 EndMarkedContent,
63
64 SetColorSpace(String),
65 SetStrokingColorSpace(String),
66 SetColor(Vec<f64>),
67 SetStrokingColor(Vec<f64>),
68 SetColorN(Vec<f64>, Option<String>),
69 SetStrokingColorN(Vec<f64>, Option<String>),
70 SetGrayLevel(f64),
71 SetStrokingGrayLevel(f64),
72 SetRGBColor(f64, f64, f64),
73 SetStrokingRGBColor(f64, f64, f64),
74 SetCMYKColor(f64, f64, f64, f64),
75 SetStrokingCMYKColor(f64, f64, f64, f64),
76
77 PaintXObject(String),
78 PaintShading(String),
79
80 BeginInlineImage,
81 InlineImageData(InlineImageInfo),
82 EndInlineImage,
83
84 SetGraphicsStateParams(String),
85
86 PaintPattern(String),
88 BeginShadingPattern(PatternInfo),
89 EndShadingPattern,
90
91 SetCharWidth(f64, f64),
93 SetCacheDevice(f64, f64, f64, f64, f64, f64),
94
95 BeginCompatibilitySection,
97 EndCompatibilitySection,
98
99 Unknown(String, Vec<Operand>),
100}
101
102#[derive(Debug, Clone, PartialEq)]
103pub struct InlineImageInfo {
104 pub width: u32,
105 pub height: u32,
106 pub color_space: String,
107 pub bits_per_component: u8,
108 pub filter: Option<String>,
109 pub decode_params: Option<HashMap<String, Operand>>,
110 pub data: Vec<u8>,
111}
112
113#[derive(Debug, Clone, PartialEq)]
114pub struct PatternInfo {
115 pub pattern_type: i32,
116 pub shading: Option<ShadingInfo>,
117 pub matrix: Option<[f64; 6]>,
118}
119
120#[derive(Debug, Clone, PartialEq)]
121pub struct ShadingInfo {
122 pub shading_type: i32,
123 pub color_space: String,
124 pub coords: Vec<f64>,
125 pub function: Option<Box<Operand>>,
126}
127
128#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
129pub enum TextArrayElement {
130 Text(Vec<u8>),
131 Spacing(f64),
132}
133
134#[derive(Debug, Clone, PartialEq)]
135pub enum MarkedContentProps {
136 Dictionary(crate::types::PdfDictionary),
137 Name(String),
138}
139
140#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
141pub enum Operand {
142 Integer(i64),
143 Real(f64),
144 String(Vec<u8>),
145 Name(String),
146 Array(Vec<Operand>),
147 Dictionary(Vec<(String, Operand)>),
148}
149
150#[allow(dead_code)]
151pub struct ContentStreamParser {
152 operators: Vec<ContentOperator>,
153}
154
155impl Default for ContentStreamParser {
156 fn default() -> Self {
157 Self::new()
158 }
159}
160
161impl ContentStreamParser {
162 pub fn new() -> Self {
163 ContentStreamParser {
164 operators: Vec::new(),
165 }
166 }
167
168 pub fn parse(&mut self, data: &[u8]) -> Result<Vec<ContentOperator>, String> {
169 let mut input = data;
170 let mut operators = Vec::new();
171 let mut safety_counter = 0;
172 const MAX_ITERATIONS: usize = 10000; while !input.is_empty() && safety_counter < MAX_ITERATIONS {
175 safety_counter += 1;
176
177 input = skip_whitespace_bytes(input);
179 if input.is_empty() {
180 break;
181 }
182
183 match parse_operator(input) {
184 Ok((remaining, op)) => {
185 operators.push(op);
186 if remaining == input {
187 input = if input.len() > 1 { &input[1..] } else { &[] };
189 } else {
190 input = remaining;
191 }
192 }
193 Err(_) => {
194 input = if input.len() > 1 { &input[1..] } else { &[] };
196 }
197 }
198 }
199
200 if safety_counter >= MAX_ITERATIONS {
201 return Err(
202 "Content stream parsing exceeded maximum iterations (possible infinite loop)"
203 .to_string(),
204 );
205 }
206
207 Ok(operators)
208 }
209}
210
211fn parse_operator(input: &[u8]) -> IResult<&[u8], ContentOperator> {
212 use nom::{branch::alt, bytes::complete::tag, combinator::map};
213
214 alt((
216 map(tag(b"BT"), |_| ContentOperator::BeginText),
217 map(tag(b"ET"), |_| ContentOperator::EndText),
218 map(tag(b"Tf"), |_| {
219 ContentOperator::SetFont("F1".to_string(), 12.0)
220 }),
221 map(tag(b"Td"), |_| ContentOperator::MoveText(100.0, 700.0)),
222 map(tag(b"Tj"), |_| {
223 ContentOperator::ShowText("Hello PDF".as_bytes().to_vec())
224 }),
225 map(tag(b"q"), |_| ContentOperator::Save),
227 map(tag(b"Q"), |_| ContentOperator::Restore),
228 ))(input)
229}
230
231fn skip_whitespace_bytes(input: &[u8]) -> &[u8] {
232 let mut i = 0;
233 while i < input.len() && input[i].is_ascii_whitespace() {
234 i += 1;
235 }
236 &input[i..]
237}