1use crate::parser::content_stream::{ContentOperator, InlineImageInfo, TextArrayElement};
2use nom::{
3 branch::alt,
4 bytes::complete::{tag, take_while, take_while1},
5 character::complete::{char, digit1, multispace0, multispace1, one_of},
6 combinator::{map, opt, recognize},
7 multi::separated_list0,
8 sequence::{delimited, preceded, tuple},
9 IResult,
10};
11use std::collections::HashMap;
12
13#[derive(Debug, Clone, PartialEq)]
14pub enum Operand {
15 Integer(i64),
16 Real(f64),
17 String(Vec<u8>),
18 Name(String),
19 Array(Vec<Operand>),
20 Dictionary(HashMap<String, Operand>),
21 Boolean(bool),
22 Null,
23}
24
25impl Operand {
26 pub fn as_number(&self) -> Option<f64> {
27 match self {
28 Operand::Integer(i) => Some(*i as f64),
29 Operand::Real(r) => Some(*r),
30 _ => None,
31 }
32 }
33
34 pub fn as_string(&self) -> Option<&[u8]> {
35 match self {
36 Operand::String(s) => Some(s),
37 _ => None,
38 }
39 }
40
41 pub fn as_name(&self) -> Option<&str> {
42 match self {
43 Operand::Name(n) => Some(n),
44 _ => None,
45 }
46 }
47}
48
49pub fn parse_content_stream(input: &[u8]) -> Vec<ContentOperator> {
51 let mut operators = Vec::new();
52 let mut operand_stack: Vec<Operand> = Vec::new();
53 let mut remaining = input;
54
55 while !remaining.is_empty() {
56 if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
58 remaining = rest;
59 }
60
61 if remaining.is_empty() {
62 break;
63 }
64
65 if let Ok((rest, operand)) = parse_operand(remaining) {
67 operand_stack.push(operand);
68 remaining = rest;
69 }
70 else if let Ok((rest, op)) = parse_operator_with_operands(remaining, &mut operand_stack) {
72 operators.push(op);
73 remaining = rest;
74 }
75 else {
77 remaining = &remaining[1..];
78 }
79 }
80
81 operators
82}
83
84#[derive(Debug, Clone)]
85pub struct ContentOperatorWithOffset {
86 pub operator: ContentOperator,
87 pub offset: usize,
88}
89
90pub fn parse_content_stream_with_offsets(input: &[u8]) -> Vec<ContentOperatorWithOffset> {
92 let mut operators = Vec::new();
93 let mut operand_stack: Vec<Operand> = Vec::new();
94 let mut remaining = input;
95 let base_len = input.len();
96
97 while !remaining.is_empty() {
98 if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
99 remaining = rest;
100 }
101 if remaining.is_empty() {
102 break;
103 }
104
105 if let Ok((rest, operand)) = parse_operand(remaining) {
106 operand_stack.push(operand);
107 remaining = rest;
108 } else if let Ok((rest, op)) = parse_operator_with_operands(remaining, &mut operand_stack) {
109 let offset = base_len.saturating_sub(remaining.len());
110 operators.push(ContentOperatorWithOffset {
111 operator: op,
112 offset,
113 });
114 remaining = rest;
115 } else {
116 remaining = &remaining[1..];
117 }
118 }
119
120 operators
121}
122
123fn parse_operand(input: &[u8]) -> IResult<&[u8], Operand> {
125 alt((
126 map(parse_number, |n| match n {
127 Number::Integer(i) => Operand::Integer(i),
128 Number::Real(r) => Operand::Real(r),
129 }),
130 map(parse_string, Operand::String),
131 map(parse_hex_string, Operand::String),
132 map(parse_name, Operand::Name),
133 map(parse_array, Operand::Array),
134 map(parse_dictionary, Operand::Dictionary),
135 map(tag(b"true"), |_| Operand::Boolean(true)),
136 map(tag(b"false"), |_| Operand::Boolean(false)),
137 map(tag(b"null"), |_| Operand::Null),
138 ))(input)
139}
140
141#[derive(Debug)]
142enum Number {
143 Integer(i64),
144 Real(f64),
145}
146
147fn parse_number(input: &[u8]) -> IResult<&[u8], Number> {
148 let (input, sign) = opt(one_of("+-"))(input)?;
149 let (input, num_str) = recognize(tuple((digit1, opt(tuple((char('.'), digit1))))))(input)?;
150
151 let num_string = std::str::from_utf8(num_str).map_err(|_| {
152 nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
153 })?;
154
155 let sign_mult = if sign == Some('-') { -1.0 } else { 1.0 };
156
157 if num_string.contains('.') {
158 let value: f64 = num_string.parse().map_err(|_| {
159 nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
160 })?;
161 Ok((input, Number::Real(value * sign_mult)))
162 } else {
163 let value: i64 = num_string.parse().map_err(|_| {
164 nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
165 })?;
166 Ok((input, Number::Integer((value as f64 * sign_mult) as i64)))
167 }
168}
169
170fn parse_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
171 let (input, _) = char('(')(input)?;
172 let mut result = Vec::new();
173 let mut remaining = input;
174 let mut paren_depth = 1;
175
176 while paren_depth > 0 && !remaining.is_empty() {
177 match remaining[0] {
178 b'(' => {
179 paren_depth += 1;
180 result.push(b'(');
181 remaining = &remaining[1..];
182 }
183 b')' => {
184 paren_depth -= 1;
185 if paren_depth > 0 {
186 result.push(b')');
187 }
188 remaining = &remaining[1..];
189 }
190 b'\\' if remaining.len() > 1 => {
191 match remaining[1] {
193 b'n' => result.push(b'\n'),
194 b'r' => result.push(b'\r'),
195 b't' => result.push(b'\t'),
196 b'b' => result.push(b'\x08'),
197 b'f' => result.push(b'\x0C'),
198 b'(' => result.push(b'('),
199 b')' => result.push(b')'),
200 b'\\' => result.push(b'\\'),
201 c if c.is_ascii_digit() => {
202 let mut octal = vec![c];
204 let mut idx = 2;
205 while idx < remaining.len() && idx < 4 && remaining[idx].is_ascii_digit() {
206 octal.push(remaining[idx]);
207 idx += 1;
208 }
209 if let Ok(s) = std::str::from_utf8(&octal) {
210 if let Ok(n) = u8::from_str_radix(s, 8) {
211 result.push(n);
212 }
213 }
214 remaining = &remaining[idx..];
215 continue;
216 }
217 _ => {
218 result.push(remaining[1]);
219 }
220 }
221 remaining = &remaining[2..];
222 }
223 c => {
224 result.push(c);
225 remaining = &remaining[1..];
226 }
227 }
228 }
229
230 Ok((remaining, result))
231}
232
233fn parse_hex_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
234 let (input, _) = char('<')(input)?;
235 let (input, hex) = take_while(|c: u8| c.is_ascii_hexdigit() || c.is_ascii_whitespace())(input)?;
236 let (input, _) = char('>')(input)?;
237
238 let hex_clean: Vec<u8> = hex
239 .iter()
240 .filter(|c| c.is_ascii_hexdigit())
241 .copied()
242 .collect();
243
244 let mut result = Vec::new();
245 for chunk in hex_clean.chunks(2) {
246 let high = chunk[0];
247 let low = if chunk.len() > 1 { chunk[1] } else { b'0' };
248
249 let h = if high.is_ascii_digit() {
250 high - b'0'
251 } else {
252 (high.to_ascii_uppercase() - b'A') + 10
253 };
254 let l = if low.is_ascii_digit() {
255 low - b'0'
256 } else {
257 (low.to_ascii_uppercase() - b'A') + 10
258 };
259
260 result.push((h << 4) | l);
261 }
262
263 Ok((input, result))
264}
265
266fn parse_name(input: &[u8]) -> IResult<&[u8], String> {
267 let (input, _) = char('/')(input)?;
268 let (input, name) = take_while(|c: u8| {
269 !c.is_ascii_whitespace()
270 && c != b'/'
271 && c != b'['
272 && c != b']'
273 && c != b'('
274 && c != b')'
275 && c != b'<'
276 && c != b'>'
277 })(input)?;
278
279 let mut result = String::new();
281 let mut i = 0;
282 let name_bytes = name;
283
284 while i < name_bytes.len() {
285 if name_bytes[i] == b'#' && i + 2 < name_bytes.len() {
286 if let Ok(hex) = std::str::from_utf8(&name_bytes[i + 1..i + 3]) {
287 if let Ok(byte) = u8::from_str_radix(hex, 16) {
288 result.push(byte as char);
289 i += 3;
290 continue;
291 }
292 }
293 }
294 result.push(name_bytes[i] as char);
295 i += 1;
296 }
297
298 Ok((input, result))
299}
300
301fn parse_array(input: &[u8]) -> IResult<&[u8], Vec<Operand>> {
302 delimited(
303 preceded(char('['), multispace0),
304 separated_list0(multispace1, parse_operand),
305 preceded(multispace0, char(']')),
306 )(input)
307}
308
309fn parse_dictionary(input: &[u8]) -> IResult<&[u8], HashMap<String, Operand>> {
310 let (input, _) = preceded(tag(b"<<"), multispace0)(input)?;
311 let mut dict = HashMap::new();
312 let mut remaining = input;
313
314 loop {
315 if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
317 remaining = rest;
318 }
319
320 if let Ok((rest, _)) = tag::<_, _, nom::error::Error<_>>(b">>")(remaining) {
322 return Ok((rest, dict));
323 }
324
325 if let Ok((rest, name)) = parse_name(remaining) {
327 remaining = rest;
328
329 if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
331 remaining = rest;
332 }
333
334 if let Ok((rest, value)) = parse_operand(remaining) {
336 dict.insert(name, value);
337 remaining = rest;
338 } else {
339 break;
340 }
341 } else {
342 break;
343 }
344 }
345
346 Err(nom::Err::Error(nom::error::Error::new(
347 input,
348 nom::error::ErrorKind::Tag,
349 )))
350}
351
352fn parse_operator_with_operands<'a>(
354 input: &'a [u8],
355 operand_stack: &mut Vec<Operand>,
356) -> IResult<&'a [u8], ContentOperator> {
357 let (input, op_name) =
358 take_while1(|c: u8| c.is_ascii_alphabetic() || c == b'*' || c == b'\'' || c == b'"')(
359 input,
360 )?;
361
362 let operator = match op_name {
363 b"BT" => {
365 operand_stack.clear();
366 ContentOperator::BeginText
367 }
368 b"ET" => {
369 operand_stack.clear();
370 ContentOperator::EndText
371 }
372 b"Tc" => {
373 let spacing = pop_number(operand_stack).unwrap_or(0.0);
374 ContentOperator::SetCharSpace(spacing)
375 }
376 b"Tw" => {
377 let spacing = pop_number(operand_stack).unwrap_or(0.0);
378 ContentOperator::SetWordSpace(spacing)
379 }
380 b"Tz" => {
381 let scale = pop_number(operand_stack).unwrap_or(100.0);
382 ContentOperator::SetHorizontalScale(scale)
383 }
384 b"TL" => {
385 let leading = pop_number(operand_stack).unwrap_or(0.0);
386 ContentOperator::SetLeading(leading)
387 }
388 b"Tf" => {
389 let size = pop_number(operand_stack).unwrap_or(12.0);
390 let font = pop_name(operand_stack).unwrap_or_default();
391 ContentOperator::SetFont(font, size)
392 }
393 b"Tr" => {
394 let mode = pop_number(operand_stack).unwrap_or(0.0) as i32;
395 ContentOperator::SetTextRenderMode(mode)
396 }
397 b"Ts" => {
398 let rise = pop_number(operand_stack).unwrap_or(0.0);
399 ContentOperator::SetTextRise(rise)
400 }
401 b"Td" => {
402 let ty = pop_number(operand_stack).unwrap_or(0.0);
403 let tx = pop_number(operand_stack).unwrap_or(0.0);
404 ContentOperator::MoveText(tx, ty)
405 }
406 b"TD" => {
407 let ty = pop_number(operand_stack).unwrap_or(0.0);
408 let tx = pop_number(operand_stack).unwrap_or(0.0);
409 ContentOperator::MoveText(tx, ty)
410 }
411 b"Tm" => {
412 let f = pop_number(operand_stack).unwrap_or(0.0);
413 let e = pop_number(operand_stack).unwrap_or(0.0);
414 let d = pop_number(operand_stack).unwrap_or(1.0);
415 let c = pop_number(operand_stack).unwrap_or(0.0);
416 let b = pop_number(operand_stack).unwrap_or(0.0);
417 let a = pop_number(operand_stack).unwrap_or(1.0);
418 ContentOperator::SetTextMatrix(a, b, c, d, e, f)
419 }
420 b"T*" => {
421 operand_stack.clear();
422 ContentOperator::MoveTextNextLine
423 }
424 b"Tj" => {
425 let text = pop_string(operand_stack).unwrap_or_default();
426 ContentOperator::ShowText(text)
427 }
428 b"TJ" => {
429 let array = pop_text_array(operand_stack);
430 ContentOperator::ShowTextArray(array)
431 }
432 b"'" => {
433 let text = pop_string(operand_stack).unwrap_or_default();
434 ContentOperator::ShowTextNextLine(text)
435 }
436 b"\"" => {
437 let text = pop_string(operand_stack).unwrap_or_default();
438 let tc = pop_number(operand_stack).unwrap_or(0.0);
439 let tw = pop_number(operand_stack).unwrap_or(0.0);
440 ContentOperator::ShowTextWithSpacing(tw, tc, text)
441 }
442
443 b"m" => {
445 let y = pop_number(operand_stack).unwrap_or(0.0);
446 let x = pop_number(operand_stack).unwrap_or(0.0);
447 ContentOperator::MoveTo(x, y)
448 }
449 b"l" => {
450 let y = pop_number(operand_stack).unwrap_or(0.0);
451 let x = pop_number(operand_stack).unwrap_or(0.0);
452 ContentOperator::LineTo(x, y)
453 }
454 b"c" => {
455 let y3 = pop_number(operand_stack).unwrap_or(0.0);
456 let x3 = pop_number(operand_stack).unwrap_or(0.0);
457 let y2 = pop_number(operand_stack).unwrap_or(0.0);
458 let x2 = pop_number(operand_stack).unwrap_or(0.0);
459 let y1 = pop_number(operand_stack).unwrap_or(0.0);
460 let x1 = pop_number(operand_stack).unwrap_or(0.0);
461 ContentOperator::CurveTo(x1, y1, x2, y2, x3, y3)
462 }
463 b"v" => {
464 let y3 = pop_number(operand_stack).unwrap_or(0.0);
465 let x3 = pop_number(operand_stack).unwrap_or(0.0);
466 let y2 = pop_number(operand_stack).unwrap_or(0.0);
467 let x2 = pop_number(operand_stack).unwrap_or(0.0);
468 ContentOperator::CurveToV(x2, y2, x3, y3)
469 }
470 b"y" => {
471 let y3 = pop_number(operand_stack).unwrap_or(0.0);
472 let x3 = pop_number(operand_stack).unwrap_or(0.0);
473 let y1 = pop_number(operand_stack).unwrap_or(0.0);
474 let x1 = pop_number(operand_stack).unwrap_or(0.0);
475 ContentOperator::CurveToY(x1, y1, x3, y3)
476 }
477 b"h" => {
478 operand_stack.clear();
479 ContentOperator::ClosePath
480 }
481 b"re" => {
482 let h = pop_number(operand_stack).unwrap_or(0.0);
483 let w = pop_number(operand_stack).unwrap_or(0.0);
484 let y = pop_number(operand_stack).unwrap_or(0.0);
485 let x = pop_number(operand_stack).unwrap_or(0.0);
486 ContentOperator::Rectangle(x, y, w, h)
487 }
488
489 b"S" => {
491 operand_stack.clear();
492 ContentOperator::Stroke
493 }
494 b"s" => {
495 operand_stack.clear();
496 ContentOperator::CloseAndStroke
497 }
498 b"f" | b"F" => {
499 operand_stack.clear();
500 ContentOperator::Fill
501 }
502 b"f*" => {
503 operand_stack.clear();
504 ContentOperator::FillEvenOdd
505 }
506 b"B" => {
507 operand_stack.clear();
508 ContentOperator::FillAndStroke
509 }
510 b"B*" => {
511 operand_stack.clear();
512 ContentOperator::FillAndStrokeEvenOdd
513 }
514 b"b" => {
515 operand_stack.clear();
516 ContentOperator::CloseFillAndStroke
517 }
518 b"b*" => {
519 operand_stack.clear();
520 ContentOperator::CloseFillAndStrokeEvenOdd
521 }
522 b"n" => {
523 operand_stack.clear();
524 ContentOperator::EndPath
525 }
526
527 b"W" => {
529 operand_stack.clear();
530 ContentOperator::Clip
531 }
532 b"W*" => {
533 operand_stack.clear();
534 ContentOperator::ClipEvenOdd
535 }
536
537 b"q" => {
539 operand_stack.clear();
540 ContentOperator::Save
541 }
542 b"Q" => {
543 operand_stack.clear();
544 ContentOperator::Restore
545 }
546 b"cm" => {
547 let f = pop_number(operand_stack).unwrap_or(0.0);
548 let e = pop_number(operand_stack).unwrap_or(0.0);
549 let d = pop_number(operand_stack).unwrap_or(1.0);
550 let c = pop_number(operand_stack).unwrap_or(0.0);
551 let b = pop_number(operand_stack).unwrap_or(0.0);
552 let a = pop_number(operand_stack).unwrap_or(1.0);
553 ContentOperator::SetMatrix(a, b, c, d, e, f)
554 }
555 b"w" => {
556 let width = pop_number(operand_stack).unwrap_or(1.0);
557 ContentOperator::SetLineWidth(width)
558 }
559 b"J" => {
560 let cap = pop_number(operand_stack).unwrap_or(0.0) as i32;
561 ContentOperator::SetLineCap(cap)
562 }
563 b"j" => {
564 let join = pop_number(operand_stack).unwrap_or(0.0) as i32;
565 ContentOperator::SetLineJoin(join)
566 }
567 b"M" => {
568 let limit = pop_number(operand_stack).unwrap_or(10.0);
569 ContentOperator::SetMiterLimit(limit)
570 }
571 b"d" => {
572 let phase = pop_number(operand_stack).unwrap_or(0.0);
573 let pattern = pop_array(operand_stack);
574 ContentOperator::SetDashPattern(pattern, phase)
575 }
576 b"ri" => {
577 let intent = pop_name(operand_stack).unwrap_or_default();
578 ContentOperator::SetRenderingIntent(intent)
579 }
580 b"i" => {
581 let flatness = pop_number(operand_stack).unwrap_or(1.0);
582 ContentOperator::SetFlatness(flatness)
583 }
584 b"gs" => {
585 let name = pop_name(operand_stack).unwrap_or_default();
586 ContentOperator::SetGraphicsStateParams(name)
587 }
588
589 b"CS" => {
591 let name = pop_name(operand_stack).unwrap_or_default();
592 ContentOperator::SetStrokingColorSpace(name)
593 }
594 b"cs" => {
595 let name = pop_name(operand_stack).unwrap_or_default();
596 ContentOperator::SetColorSpace(name)
597 }
598 b"SC" | b"SCN" => {
599 let mut colors = Vec::new();
600 let mut pattern_name = None;
601
602 if let Some(Operand::Name(n)) = operand_stack.last() {
604 pattern_name = Some(n.clone());
605 operand_stack.pop();
606 }
607
608 while let Some(n) = pop_number(operand_stack) {
610 colors.insert(0, n);
611 }
612
613 if op_name == b"SCN" {
614 ContentOperator::SetStrokingColorN(colors, pattern_name)
615 } else {
616 ContentOperator::SetStrokingColor(colors)
617 }
618 }
619 b"sc" | b"scn" => {
620 let mut colors = Vec::new();
621 let mut pattern_name = None;
622
623 if let Some(Operand::Name(n)) = operand_stack.last() {
625 pattern_name = Some(n.clone());
626 operand_stack.pop();
627 }
628
629 while let Some(n) = pop_number(operand_stack) {
631 colors.insert(0, n);
632 }
633
634 if op_name == b"scn" {
635 ContentOperator::SetColorN(colors, pattern_name)
636 } else {
637 ContentOperator::SetColor(colors)
638 }
639 }
640 b"G" => {
641 let gray = pop_number(operand_stack).unwrap_or(0.0);
642 ContentOperator::SetStrokingGrayLevel(gray)
643 }
644 b"g" => {
645 let gray = pop_number(operand_stack).unwrap_or(0.0);
646 ContentOperator::SetGrayLevel(gray)
647 }
648 b"RG" => {
649 let b = pop_number(operand_stack).unwrap_or(0.0);
650 let g = pop_number(operand_stack).unwrap_or(0.0);
651 let r = pop_number(operand_stack).unwrap_or(0.0);
652 ContentOperator::SetStrokingRGBColor(r, g, b)
653 }
654 b"rg" => {
655 let b = pop_number(operand_stack).unwrap_or(0.0);
656 let g = pop_number(operand_stack).unwrap_or(0.0);
657 let r = pop_number(operand_stack).unwrap_or(0.0);
658 ContentOperator::SetRGBColor(r, g, b)
659 }
660 b"K" => {
661 let k = pop_number(operand_stack).unwrap_or(0.0);
662 let y = pop_number(operand_stack).unwrap_or(0.0);
663 let m = pop_number(operand_stack).unwrap_or(0.0);
664 let c = pop_number(operand_stack).unwrap_or(0.0);
665 ContentOperator::SetStrokingCMYKColor(c, m, y, k)
666 }
667 b"k" => {
668 let k = pop_number(operand_stack).unwrap_or(0.0);
669 let y = pop_number(operand_stack).unwrap_or(0.0);
670 let m = pop_number(operand_stack).unwrap_or(0.0);
671 let c = pop_number(operand_stack).unwrap_or(0.0);
672 ContentOperator::SetCMYKColor(c, m, y, k)
673 }
674
675 b"Do" => {
677 let name = pop_name(operand_stack).unwrap_or_default();
678 ContentOperator::PaintXObject(name)
679 }
680
681 b"sh" => {
683 let name = pop_name(operand_stack).unwrap_or_default();
684 ContentOperator::PaintShading(name)
685 }
686
687 b"BI" => {
689 operand_stack.clear();
690 ContentOperator::BeginInlineImage
691 }
692
693 b"BMC" => {
695 let tag = pop_name(operand_stack).unwrap_or_default();
696 ContentOperator::BeginMarkedContent(tag)
697 }
698 b"BDC" => {
699 let props = if let Some(Operand::Dictionary(d)) = operand_stack.pop() {
700 crate::parser::content_stream::MarkedContentProps::Dictionary(dict_to_pdf_dict(d))
701 } else if let Some(Operand::Name(n)) = operand_stack.pop() {
702 crate::parser::content_stream::MarkedContentProps::Name(n)
703 } else {
704 crate::parser::content_stream::MarkedContentProps::Name(String::new())
705 };
706 let tag = pop_name(operand_stack).unwrap_or_default();
707 ContentOperator::BeginMarkedContentWithProps(tag, props)
708 }
709 b"EMC" => {
710 operand_stack.clear();
711 ContentOperator::EndMarkedContent
712 }
713
714 _ => {
715 let operands: Vec<_> = operand_stack
717 .drain(..)
718 .map(convert_operand_to_content_stream)
719 .collect();
720 ContentOperator::Unknown(String::from_utf8_lossy(op_name).to_string(), operands)
721 }
722 };
723
724 Ok((input, operator))
725}
726
727fn pop_number(stack: &mut Vec<Operand>) -> Option<f64> {
729 stack.pop().and_then(|op| op.as_number())
730}
731
732fn pop_name(stack: &mut Vec<Operand>) -> Option<String> {
733 stack.pop().and_then(|op| match op {
734 Operand::Name(n) => Some(n),
735 _ => None,
736 })
737}
738
739fn pop_string(stack: &mut Vec<Operand>) -> Option<Vec<u8>> {
740 stack.pop().and_then(|op| match op {
741 Operand::String(s) => Some(s),
742 _ => None,
743 })
744}
745
746fn pop_array(stack: &mut Vec<Operand>) -> Vec<f64> {
747 if let Some(Operand::Array(arr)) = stack.pop() {
748 arr.into_iter().filter_map(|op| op.as_number()).collect()
749 } else {
750 Vec::new()
751 }
752}
753
754fn pop_text_array(stack: &mut Vec<Operand>) -> Vec<TextArrayElement> {
755 if let Some(Operand::Array(arr)) = stack.pop() {
756 arr.into_iter()
757 .map(|op| match op {
758 Operand::String(s) => TextArrayElement::Text(s),
759 Operand::Integer(i) => TextArrayElement::Spacing(i as f64),
760 Operand::Real(r) => TextArrayElement::Spacing(r),
761 _ => TextArrayElement::Spacing(0.0),
762 })
763 .collect()
764 } else {
765 Vec::new()
766 }
767}
768
769fn dict_to_pdf_dict(dict: HashMap<String, Operand>) -> crate::types::PdfDictionary {
770 let mut pdf_dict = crate::types::PdfDictionary::new();
771 for (key, value) in dict {
772 pdf_dict.insert(key, operand_to_pdf_value(value));
773 }
774 pdf_dict
775}
776
777fn operand_to_pdf_value(op: Operand) -> crate::types::PdfValue {
778 match op {
779 Operand::Integer(i) => crate::types::PdfValue::Integer(i),
780 Operand::Real(r) => crate::types::PdfValue::Real(r),
781 Operand::String(s) => {
782 crate::types::PdfValue::String(crate::types::primitive::PdfString::new_literal(s))
783 }
784 Operand::Name(n) => crate::types::PdfValue::Name(crate::types::primitive::PdfName::new(n)),
785 Operand::Boolean(b) => crate::types::PdfValue::Boolean(b),
786 Operand::Null => crate::types::PdfValue::Null,
787 Operand::Array(arr) => {
788 let pdf_arr: Vec<_> = arr.into_iter().map(operand_to_pdf_value).collect();
789 crate::types::PdfValue::Array(crate::types::object::PdfArray::from(pdf_arr))
790 }
791 Operand::Dictionary(dict) => crate::types::PdfValue::Dictionary(dict_to_pdf_dict(dict)),
792 }
793}
794
795pub fn parse_inline_image(input: &[u8]) -> IResult<&[u8], InlineImageInfo> {
797 let (input, _) = tag(b"BI")(input)?;
799 let (input, _) = multispace0(input)?;
800
801 let mut dict = HashMap::new();
803 let mut remaining = input;
804
805 loop {
806 if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
808 remaining = rest;
809 }
810
811 if remaining.starts_with(b"ID") && remaining.len() > 2 && remaining[2].is_ascii_whitespace()
813 {
814 remaining = &remaining[3..]; break;
816 }
817
818 if let Ok((rest, abbrev)) = parse_inline_image_key(remaining) {
820 remaining = rest;
821
822 if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
824 remaining = rest;
825 }
826
827 if let Ok((rest, value)) = parse_operand(remaining) {
829 dict.insert(expand_inline_image_key(&abbrev), value);
830 remaining = rest;
831 }
832 } else {
833 break;
834 }
835 }
836
837 let mut data_end = 0;
839 for i in 0..remaining.len() {
840 if remaining[i..].starts_with(b"EI") {
841 if i + 2 >= remaining.len() || remaining[i + 2].is_ascii_whitespace() {
843 data_end = i;
844 break;
845 }
846 }
847 }
848
849 let data = remaining[..data_end].to_vec();
850 let remaining = &remaining[data_end..];
851
852 let (remaining, _) = tag(b"EI")(remaining)?;
854
855 let width = dict.get("Width").and_then(|v| v.as_number()).unwrap_or(1.0) as u32;
857
858 let height = dict
859 .get("Height")
860 .and_then(|v| v.as_number())
861 .unwrap_or(1.0) as u32;
862
863 let color_space = dict
864 .get("ColorSpace")
865 .and_then(|v| v.as_name())
866 .unwrap_or("DeviceGray")
867 .to_string();
868
869 let bits_per_component = dict
870 .get("BitsPerComponent")
871 .and_then(|v| v.as_number())
872 .unwrap_or(8.0) as u8;
873
874 let filter = dict
875 .get("Filter")
876 .and_then(|v| v.as_name())
877 .map(|s| s.to_string());
878
879 let decode_params = if dict.contains_key("DecodeParms") {
880 let mut params = HashMap::new();
881 if let Some(Operand::Dictionary(d)) = dict.get("DecodeParms") {
882 for (k, v) in d {
883 params.insert(
884 k.clone(),
885 pdf_value_to_content_operand(operand_to_pdf_value(v.clone())),
886 );
887 }
888 }
889 Some(params)
890 } else {
891 None
892 };
893
894 Ok((
895 remaining,
896 InlineImageInfo {
897 width,
898 height,
899 color_space,
900 bits_per_component,
901 filter,
902 decode_params,
903 data,
904 },
905 ))
906}
907
908fn parse_inline_image_key(input: &[u8]) -> IResult<&[u8], String> {
909 let (input, key) = take_while1(|c: u8| c.is_ascii_alphabetic())(input)?;
910 Ok((input, String::from_utf8_lossy(key).to_string()))
911}
912
913fn expand_inline_image_key(abbrev: &str) -> String {
914 match abbrev {
915 "BPC" => "BitsPerComponent",
916 "CS" => "ColorSpace",
917 "D" => "Decode",
918 "DP" => "DecodeParms",
919 "F" => "Filter",
920 "H" => "Height",
921 "IM" => "ImageMask",
922 "I" => "Interpolate",
923 "W" => "Width",
924 _ => abbrev,
925 }
926 .to_string()
927}
928
929fn convert_operand_to_content_stream(op: Operand) -> crate::parser::content_stream::Operand {
930 match op {
931 Operand::Integer(i) => crate::parser::content_stream::Operand::Integer(i),
932 Operand::Real(r) => crate::parser::content_stream::Operand::Real(r),
933 Operand::String(s) => crate::parser::content_stream::Operand::String(s),
934 Operand::Name(n) => crate::parser::content_stream::Operand::Name(n),
935 Operand::Boolean(b) => {
936 crate::parser::content_stream::Operand::Integer(if b { 1 } else { 0 })
938 }
939 Operand::Null => {
940 crate::parser::content_stream::Operand::Integer(0)
942 }
943 Operand::Array(arr) => crate::parser::content_stream::Operand::Array(
944 arr.into_iter()
945 .map(convert_operand_to_content_stream)
946 .collect(),
947 ),
948 Operand::Dictionary(dict) => crate::parser::content_stream::Operand::Dictionary(
949 dict.into_iter()
950 .map(|(k, v)| (k, convert_operand_to_content_stream(v)))
951 .collect(),
952 ),
953 }
954}
955
956fn pdf_value_to_content_operand(
957 val: crate::types::PdfValue,
958) -> crate::parser::content_stream::Operand {
959 match val {
960 crate::types::PdfValue::Integer(i) => crate::parser::content_stream::Operand::Integer(i),
961 crate::types::PdfValue::Real(r) => crate::parser::content_stream::Operand::Real(r),
962 crate::types::PdfValue::String(s) => {
963 crate::parser::content_stream::Operand::String(s.as_bytes().to_vec())
964 }
965 crate::types::PdfValue::Name(n) => {
966 crate::parser::content_stream::Operand::Name(n.without_slash().to_string())
967 }
968 crate::types::PdfValue::Boolean(_) => crate::parser::content_stream::Operand::Integer(1), crate::types::PdfValue::Null => crate::parser::content_stream::Operand::Integer(0), crate::types::PdfValue::Array(arr) => {
971 let operands: Vec<_> = arr
972 .iter()
973 .map(|v| pdf_value_to_content_operand(v.clone()))
974 .collect();
975 crate::parser::content_stream::Operand::Array(operands)
976 }
977 _ => crate::parser::content_stream::Operand::Integer(0), }
979}