1use crate::types::{PdfDictionary, PdfName, PdfValue};
2use serde::{Deserialize, Serialize};
3use std::fmt;
4
5#[derive(Debug, Clone, PartialEq)]
6pub struct PdfStream {
7 pub dict: PdfDictionary,
8 pub data: StreamData,
9}
10
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
12pub enum StreamData {
13 Raw(Vec<u8>),
14 Decoded(Vec<u8>),
15 Lazy(StreamReference),
16}
17
18impl StreamData {
19 pub fn len(&self) -> usize {
20 match self {
21 StreamData::Raw(data) | StreamData::Decoded(data) => data.len(),
22 StreamData::Lazy(reference) => reference.length,
23 }
24 }
25
26 pub fn is_empty(&self) -> bool {
27 self.len() == 0
28 }
29
30 pub fn hash(&self) -> String {
31 use std::collections::hash_map::DefaultHasher;
32 use std::hash::{Hash, Hasher};
33
34 let mut hasher = DefaultHasher::new();
35 match self {
36 StreamData::Raw(data) | StreamData::Decoded(data) => {
37 data.hash(&mut hasher);
38 }
39 StreamData::Lazy(reference) => {
40 reference.offset.hash(&mut hasher);
41 reference.length.hash(&mut hasher);
42 }
43 }
44 format!("{:x}", hasher.finish())
45 }
46
47 pub fn truncate(&mut self, len: usize) {
48 match self {
49 StreamData::Raw(data) | StreamData::Decoded(data) => {
50 data.truncate(len);
51 }
52 StreamData::Lazy(_) => {
53 }
55 }
56 }
57
58 pub fn as_bytes(&self) -> Option<&[u8]> {
59 match self {
60 StreamData::Raw(data) | StreamData::Decoded(data) => Some(data),
61 StreamData::Lazy(_) => None,
62 }
63 }
64}
65
66impl std::ops::Index<usize> for StreamData {
67 type Output = u8;
68
69 fn index(&self, index: usize) -> &Self::Output {
70 match self {
71 StreamData::Raw(data) | StreamData::Decoded(data) => &data[index],
72 StreamData::Lazy(_) => panic!("Cannot index into lazy stream data"),
73 }
74 }
75}
76
77#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
78pub struct StreamReference {
79 pub offset: u64,
80 pub length: usize,
81 pub filters: Vec<StreamFilter>,
82}
83
84#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
85pub enum StreamFilter {
86 ASCIIHexDecode,
87 ASCII85Decode,
88 LZWDecode(LZWDecodeParams),
89 FlateDecode(FlateDecodeParams),
90 RunLengthDecode,
91 CCITTFaxDecode(CCITTFaxDecodeParams),
92 JBIG2Decode,
93 DCTDecode,
94 JPXDecode,
95 Crypt(CryptFilter),
96}
97
98#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
99pub struct LZWDecodeParams {
100 pub predictor: Option<i32>,
101 pub colors: Option<i32>,
102 pub bits_per_component: Option<i32>,
103 pub columns: Option<i32>,
104 pub early_change: Option<bool>,
105}
106
107#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
108pub struct FlateDecodeParams {
109 pub predictor: Option<i32>,
110 pub colors: Option<i32>,
111 pub bits_per_component: Option<i32>,
112 pub columns: Option<i32>,
113}
114
115#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
116pub struct CCITTFaxDecodeParams {
117 pub k: Option<i32>,
118 pub end_of_line: Option<bool>,
119 pub encoded_byte_align: Option<bool>,
120 pub columns: Option<i32>,
121 pub rows: Option<i32>,
122 pub end_of_block: Option<bool>,
123 pub black_is_1: Option<bool>,
124 pub damaged_rows_before_error: Option<i32>,
125}
126
127#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
128pub struct CryptFilter {
129 pub name: PdfName,
130}
131
132#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
133pub enum CryptFilterParams {
134 Identity,
136 V2 { name: String },
138 AESV2 { name: String },
140 AESV3 { name: String },
142}
143
144impl PdfStream {
145 pub fn new(dict: PdfDictionary, data: Vec<u8>) -> Self {
146 PdfStream {
147 dict,
148 data: StreamData::Raw(data),
149 }
150 }
151
152 pub fn new_lazy(dict: PdfDictionary, reference: StreamReference) -> Self {
153 PdfStream {
154 dict,
155 data: StreamData::Lazy(reference),
156 }
157 }
158
159 pub fn raw_data(&self) -> Option<&[u8]> {
160 match &self.data {
161 StreamData::Raw(data) => Some(data),
162 _ => None,
163 }
164 }
165
166 pub fn decode(&self) -> Result<Vec<u8>, String> {
167 match &self.data {
168 StreamData::Raw(data) | StreamData::Decoded(data) => {
169 let filters = self.get_filters_with_params();
170 if filters.is_empty() {
171 Ok(data.clone())
172 } else {
173 crate::filters::decode_stream(data, &filters).map_err(|e| e.to_string())
174 }
175 }
176 StreamData::Lazy(_) => Err("Lazy stream decoding not implemented".to_string()),
177 }
178 }
179
180 pub fn decode_with_limits(
181 &self,
182 max_output_bytes: usize,
183 max_ratio: usize,
184 ) -> Result<Vec<u8>, String> {
185 match &self.data {
186 StreamData::Raw(data) | StreamData::Decoded(data) => {
187 let filters = self.get_filters_with_params();
188 if filters.is_empty() {
189 Ok(data.clone())
190 } else {
191 crate::filters::decode_stream_with_limits(
192 data,
193 &filters,
194 max_output_bytes,
195 max_ratio,
196 )
197 .map_err(|e| e.to_string())
198 }
199 }
200 StreamData::Lazy(_) => Err("Lazy stream decoding not implemented".to_string()),
201 }
202 }
203
204 pub fn decoded_data(&self) -> Option<&[u8]> {
205 match &self.data {
206 StreamData::Decoded(data) => Some(data),
207 _ => None,
208 }
209 }
210
211 pub fn is_lazy(&self) -> bool {
212 matches!(self.data, StreamData::Lazy(_))
213 }
214
215 pub fn length(&self) -> Option<usize> {
216 match &self.data {
217 StreamData::Raw(data) | StreamData::Decoded(data) => Some(data.len()),
218 StreamData::Lazy(reference) => Some(reference.length),
219 }
220 }
221
222 pub fn get_filters(&self) -> Vec<StreamFilter> {
223 self.get_filters_with_params()
224 }
225
226 pub fn get_filters_with_params(&self) -> Vec<StreamFilter> {
227 let mut filters = Vec::new();
228
229 let filter_names: Vec<&PdfName> = match self.dict.get("Filter") {
230 Some(PdfValue::Name(name)) => vec![name],
231 Some(PdfValue::Array(array)) => array.iter().filter_map(|v| v.as_name()).collect(),
232 _ => Vec::new(),
233 };
234
235 if filter_names.is_empty() {
236 return filters;
237 }
238
239 let mut decode_params = match self.dict.get("DecodeParms") {
240 Some(PdfValue::Dictionary(dict)) => vec![Some(dict)],
241 Some(PdfValue::Array(array)) => array.iter().map(|v| v.as_dict()).collect(),
242 Some(PdfValue::Null) => vec![None],
243 _ => Vec::new(),
244 };
245
246 if decode_params.len() < filter_names.len() {
247 decode_params.resize(filter_names.len(), None);
248 }
249
250 for (i, name) in filter_names.iter().enumerate() {
251 let params = decode_params.get(i).copied().unwrap_or(None);
252 if let Some(filter) = Self::filter_from_name_with_params(name, params) {
253 filters.push(filter);
254 }
255 }
256
257 filters
258 }
259
260 fn filter_from_name_with_params(
261 name: &PdfName,
262 params: Option<&PdfDictionary>,
263 ) -> Option<StreamFilter> {
264 match name.without_slash() {
265 "ASCIIHexDecode" | "AHx" => Some(StreamFilter::ASCIIHexDecode),
266 "ASCII85Decode" | "A85" => Some(StreamFilter::ASCII85Decode),
267 "LZWDecode" | "LZW" => {
268 let mut parsed = LZWDecodeParams::default();
269 if let Some(params) = params {
270 parsed = parse_lzw_params(params);
271 }
272 Some(StreamFilter::LZWDecode(parsed))
273 }
274 "FlateDecode" | "Fl" => {
275 let mut parsed = FlateDecodeParams::default();
276 if let Some(params) = params {
277 parsed = parse_flate_params(params);
278 }
279 Some(StreamFilter::FlateDecode(parsed))
280 }
281 "RunLengthDecode" | "RL" => Some(StreamFilter::RunLengthDecode),
282 "CCITTFaxDecode" | "CCF" => {
283 let mut parsed = CCITTFaxDecodeParams::default();
284 if let Some(params) = params {
285 parsed = parse_ccitt_params(params);
286 }
287 Some(StreamFilter::CCITTFaxDecode(parsed))
288 }
289 "JBIG2Decode" => Some(StreamFilter::JBIG2Decode),
290 "DCTDecode" | "DCT" => Some(StreamFilter::DCTDecode),
291 "JPXDecode" => Some(StreamFilter::JPXDecode),
292 "Crypt" => {
293 let crypt_name = params
294 .and_then(|p| p.get("Name"))
295 .and_then(|v| v.as_name())
296 .cloned()
297 .unwrap_or_else(|| PdfName::new("Identity"));
298 Some(StreamFilter::Crypt(CryptFilter { name: crypt_name }))
299 }
300 _ => None,
301 }
302 }
303}
304
305fn parse_flate_params(params: &PdfDictionary) -> FlateDecodeParams {
306 FlateDecodeParams {
307 predictor: params
308 .get("Predictor")
309 .and_then(|v| v.as_integer())
310 .map(|v| v as i32),
311 colors: params
312 .get("Colors")
313 .and_then(|v| v.as_integer())
314 .map(|v| v as i32),
315 bits_per_component: params
316 .get("BitsPerComponent")
317 .and_then(|v| v.as_integer())
318 .map(|v| v as i32),
319 columns: params
320 .get("Columns")
321 .and_then(|v| v.as_integer())
322 .map(|v| v as i32),
323 }
324}
325
326fn parse_lzw_params(params: &PdfDictionary) -> LZWDecodeParams {
327 LZWDecodeParams {
328 predictor: params
329 .get("Predictor")
330 .and_then(|v| v.as_integer())
331 .map(|v| v as i32),
332 colors: params
333 .get("Colors")
334 .and_then(|v| v.as_integer())
335 .map(|v| v as i32),
336 bits_per_component: params
337 .get("BitsPerComponent")
338 .and_then(|v| v.as_integer())
339 .map(|v| v as i32),
340 columns: params
341 .get("Columns")
342 .and_then(|v| v.as_integer())
343 .map(|v| v as i32),
344 early_change: params.get("EarlyChange").and_then(bool_from_value),
345 }
346}
347
348fn parse_ccitt_params(params: &PdfDictionary) -> CCITTFaxDecodeParams {
349 CCITTFaxDecodeParams {
350 k: params
351 .get("K")
352 .and_then(|v| v.as_integer())
353 .map(|v| v as i32),
354 end_of_line: params.get("EndOfLine").and_then(bool_from_value),
355 encoded_byte_align: params.get("EncodedByteAlign").and_then(bool_from_value),
356 columns: params
357 .get("Columns")
358 .and_then(|v| v.as_integer())
359 .map(|v| v as i32),
360 rows: params
361 .get("Rows")
362 .and_then(|v| v.as_integer())
363 .map(|v| v as i32),
364 end_of_block: params.get("EndOfBlock").and_then(bool_from_value),
365 black_is_1: params.get("BlackIs1").and_then(bool_from_value),
366 damaged_rows_before_error: params
367 .get("DamagedRowsBeforeError")
368 .and_then(|v| v.as_integer())
369 .map(|v| v as i32),
370 }
371}
372
373fn bool_from_value(value: &PdfValue) -> Option<bool> {
374 match value {
375 PdfValue::Boolean(b) => Some(*b),
376 PdfValue::Integer(i) => Some(*i != 0),
377 PdfValue::Real(r) => Some(*r != 0.0),
378 _ => None,
379 }
380}
381
382impl StreamFilter {
383 pub fn from_name(name: &PdfName) -> Option<Self> {
384 match name.without_slash() {
385 "ASCIIHexDecode" | "AHx" => Some(StreamFilter::ASCIIHexDecode),
386 "ASCII85Decode" | "A85" => Some(StreamFilter::ASCII85Decode),
387 "LZWDecode" | "LZW" => Some(StreamFilter::LZWDecode(LZWDecodeParams::default())),
388 "FlateDecode" | "Fl" => Some(StreamFilter::FlateDecode(FlateDecodeParams::default())),
389 "RunLengthDecode" | "RL" => Some(StreamFilter::RunLengthDecode),
390 "CCITTFaxDecode" | "CCF" => {
391 Some(StreamFilter::CCITTFaxDecode(CCITTFaxDecodeParams::default()))
392 }
393 "JBIG2Decode" => Some(StreamFilter::JBIG2Decode),
394 "DCTDecode" | "DCT" => Some(StreamFilter::DCTDecode),
395 "JPXDecode" => Some(StreamFilter::JPXDecode),
396 _ => None,
397 }
398 }
399
400 pub fn name(&self) -> &'static str {
401 match self {
402 StreamFilter::ASCIIHexDecode => "ASCIIHexDecode",
403 StreamFilter::ASCII85Decode => "ASCII85Decode",
404 StreamFilter::LZWDecode(_) => "LZWDecode",
405 StreamFilter::FlateDecode(_) => "FlateDecode",
406 StreamFilter::RunLengthDecode => "RunLengthDecode",
407 StreamFilter::CCITTFaxDecode(_) => "CCITTFaxDecode",
408 StreamFilter::JBIG2Decode => "JBIG2Decode",
409 StreamFilter::DCTDecode => "DCTDecode",
410 StreamFilter::JPXDecode => "JPXDecode",
411 StreamFilter::Crypt(_) => "Crypt",
412 }
413 }
414}
415
416impl fmt::Display for PdfStream {
417 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
418 write!(f, "{} stream[", self.dict)?;
419 match &self.data {
420 StreamData::Raw(data) => write!(f, "{} bytes raw", data.len())?,
421 StreamData::Decoded(data) => write!(f, "{} bytes decoded", data.len())?,
422 StreamData::Lazy(reference) => write!(f, "{} bytes lazy", reference.length)?,
423 }
424 write!(f, "]endstream")
425 }
426}