Skip to main content

pdf_ast/parser/
content_operands.rs

1use crate::parser::content_stream::{ContentOperator, InlineImageInfo, TextArrayElement};
2use nom::{
3    branch::alt,
4    bytes::complete::{tag, take_while, take_while1},
5    character::complete::{char, digit1, multispace0, multispace1, one_of},
6    combinator::{map, opt, recognize},
7    multi::separated_list0,
8    sequence::{delimited, preceded, tuple},
9    IResult,
10};
11use std::collections::HashMap;
12
13#[derive(Debug, Clone, PartialEq)]
14pub enum Operand {
15    Integer(i64),
16    Real(f64),
17    String(Vec<u8>),
18    Name(String),
19    Array(Vec<Operand>),
20    Dictionary(HashMap<String, Operand>),
21    Boolean(bool),
22    Null,
23}
24
25impl Operand {
26    pub fn as_number(&self) -> Option<f64> {
27        match self {
28            Operand::Integer(i) => Some(*i as f64),
29            Operand::Real(r) => Some(*r),
30            _ => None,
31        }
32    }
33
34    pub fn as_string(&self) -> Option<&[u8]> {
35        match self {
36            Operand::String(s) => Some(s),
37            _ => None,
38        }
39    }
40
41    pub fn as_name(&self) -> Option<&str> {
42        match self {
43            Operand::Name(n) => Some(n),
44            _ => None,
45        }
46    }
47}
48
49/// Parse complete content stream with operands
50pub fn parse_content_stream(input: &[u8]) -> Vec<ContentOperator> {
51    let mut operators = Vec::new();
52    let mut operand_stack: Vec<Operand> = Vec::new();
53    let mut remaining = input;
54
55    while !remaining.is_empty() {
56        // Skip whitespace
57        if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
58            remaining = rest;
59        }
60
61        if remaining.is_empty() {
62            break;
63        }
64
65        // Try to parse operand
66        if let Ok((rest, operand)) = parse_operand(remaining) {
67            operand_stack.push(operand);
68            remaining = rest;
69        }
70        // Try to parse operator
71        else if let Ok((rest, op)) = parse_operator_with_operands(remaining, &mut operand_stack) {
72            operators.push(op);
73            remaining = rest;
74        }
75        // Skip unrecognized byte
76        else {
77            remaining = &remaining[1..];
78        }
79    }
80
81    operators
82}
83
84#[derive(Debug, Clone)]
85pub struct ContentOperatorWithOffset {
86    pub operator: ContentOperator,
87    pub offset: usize,
88}
89
90/// Parse content stream and capture operator byte offsets.
91pub fn parse_content_stream_with_offsets(input: &[u8]) -> Vec<ContentOperatorWithOffset> {
92    let mut operators = Vec::new();
93    let mut operand_stack: Vec<Operand> = Vec::new();
94    let mut remaining = input;
95    let base_len = input.len();
96
97    while !remaining.is_empty() {
98        if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
99            remaining = rest;
100        }
101        if remaining.is_empty() {
102            break;
103        }
104
105        if let Ok((rest, operand)) = parse_operand(remaining) {
106            operand_stack.push(operand);
107            remaining = rest;
108        } else if let Ok((rest, op)) = parse_operator_with_operands(remaining, &mut operand_stack) {
109            let offset = base_len.saturating_sub(remaining.len());
110            operators.push(ContentOperatorWithOffset {
111                operator: op,
112                offset,
113            });
114            remaining = rest;
115        } else {
116            remaining = &remaining[1..];
117        }
118    }
119
120    operators
121}
122
123/// Parse a single operand
124fn parse_operand(input: &[u8]) -> IResult<&[u8], Operand> {
125    alt((
126        map(parse_number, |n| match n {
127            Number::Integer(i) => Operand::Integer(i),
128            Number::Real(r) => Operand::Real(r),
129        }),
130        map(parse_string, Operand::String),
131        map(parse_hex_string, Operand::String),
132        map(parse_name, Operand::Name),
133        map(parse_array, Operand::Array),
134        map(parse_dictionary, Operand::Dictionary),
135        map(tag(b"true"), |_| Operand::Boolean(true)),
136        map(tag(b"false"), |_| Operand::Boolean(false)),
137        map(tag(b"null"), |_| Operand::Null),
138    ))(input)
139}
140
141#[derive(Debug)]
142enum Number {
143    Integer(i64),
144    Real(f64),
145}
146
147fn parse_number(input: &[u8]) -> IResult<&[u8], Number> {
148    let (input, sign) = opt(one_of("+-"))(input)?;
149    let (input, num_str) = recognize(tuple((digit1, opt(tuple((char('.'), digit1))))))(input)?;
150
151    let num_string = std::str::from_utf8(num_str).map_err(|_| {
152        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
153    })?;
154
155    let sign_mult = if sign == Some('-') { -1.0 } else { 1.0 };
156
157    if num_string.contains('.') {
158        let value: f64 = num_string.parse().map_err(|_| {
159            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
160        })?;
161        Ok((input, Number::Real(value * sign_mult)))
162    } else {
163        let value: i64 = num_string.parse().map_err(|_| {
164            nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Digit))
165        })?;
166        Ok((input, Number::Integer((value as f64 * sign_mult) as i64)))
167    }
168}
169
170fn parse_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
171    let (input, _) = char('(')(input)?;
172    let mut result = Vec::new();
173    let mut remaining = input;
174    let mut paren_depth = 1;
175
176    while paren_depth > 0 && !remaining.is_empty() {
177        match remaining[0] {
178            b'(' => {
179                paren_depth += 1;
180                result.push(b'(');
181                remaining = &remaining[1..];
182            }
183            b')' => {
184                paren_depth -= 1;
185                if paren_depth > 0 {
186                    result.push(b')');
187                }
188                remaining = &remaining[1..];
189            }
190            b'\\' if remaining.len() > 1 => {
191                // Handle escape sequences
192                match remaining[1] {
193                    b'n' => result.push(b'\n'),
194                    b'r' => result.push(b'\r'),
195                    b't' => result.push(b'\t'),
196                    b'b' => result.push(b'\x08'),
197                    b'f' => result.push(b'\x0C'),
198                    b'(' => result.push(b'('),
199                    b')' => result.push(b')'),
200                    b'\\' => result.push(b'\\'),
201                    c if c.is_ascii_digit() => {
202                        // Octal escape
203                        let mut octal = vec![c];
204                        let mut idx = 2;
205                        while idx < remaining.len() && idx < 4 && remaining[idx].is_ascii_digit() {
206                            octal.push(remaining[idx]);
207                            idx += 1;
208                        }
209                        if let Ok(s) = std::str::from_utf8(&octal) {
210                            if let Ok(n) = u8::from_str_radix(s, 8) {
211                                result.push(n);
212                            }
213                        }
214                        remaining = &remaining[idx..];
215                        continue;
216                    }
217                    _ => {
218                        result.push(remaining[1]);
219                    }
220                }
221                remaining = &remaining[2..];
222            }
223            c => {
224                result.push(c);
225                remaining = &remaining[1..];
226            }
227        }
228    }
229
230    Ok((remaining, result))
231}
232
233fn parse_hex_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
234    let (input, _) = char('<')(input)?;
235    let (input, hex) = take_while(|c: u8| c.is_ascii_hexdigit() || c.is_ascii_whitespace())(input)?;
236    let (input, _) = char('>')(input)?;
237
238    let hex_clean: Vec<u8> = hex
239        .iter()
240        .filter(|c| c.is_ascii_hexdigit())
241        .copied()
242        .collect();
243
244    let mut result = Vec::new();
245    for chunk in hex_clean.chunks(2) {
246        let high = chunk[0];
247        let low = if chunk.len() > 1 { chunk[1] } else { b'0' };
248
249        let h = if high.is_ascii_digit() {
250            high - b'0'
251        } else {
252            (high.to_ascii_uppercase() - b'A') + 10
253        };
254        let l = if low.is_ascii_digit() {
255            low - b'0'
256        } else {
257            (low.to_ascii_uppercase() - b'A') + 10
258        };
259
260        result.push((h << 4) | l);
261    }
262
263    Ok((input, result))
264}
265
266fn parse_name(input: &[u8]) -> IResult<&[u8], String> {
267    let (input, _) = char('/')(input)?;
268    let (input, name) = take_while(|c: u8| {
269        !c.is_ascii_whitespace()
270            && c != b'/'
271            && c != b'['
272            && c != b']'
273            && c != b'('
274            && c != b')'
275            && c != b'<'
276            && c != b'>'
277    })(input)?;
278
279    // Decode # escapes
280    let mut result = String::new();
281    let mut i = 0;
282    let name_bytes = name;
283
284    while i < name_bytes.len() {
285        if name_bytes[i] == b'#' && i + 2 < name_bytes.len() {
286            if let Ok(hex) = std::str::from_utf8(&name_bytes[i + 1..i + 3]) {
287                if let Ok(byte) = u8::from_str_radix(hex, 16) {
288                    result.push(byte as char);
289                    i += 3;
290                    continue;
291                }
292            }
293        }
294        result.push(name_bytes[i] as char);
295        i += 1;
296    }
297
298    Ok((input, result))
299}
300
301fn parse_array(input: &[u8]) -> IResult<&[u8], Vec<Operand>> {
302    delimited(
303        preceded(char('['), multispace0),
304        separated_list0(multispace1, parse_operand),
305        preceded(multispace0, char(']')),
306    )(input)
307}
308
309fn parse_dictionary(input: &[u8]) -> IResult<&[u8], HashMap<String, Operand>> {
310    let (input, _) = preceded(tag(b"<<"), multispace0)(input)?;
311    let mut dict = HashMap::new();
312    let mut remaining = input;
313
314    loop {
315        // Skip whitespace
316        if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
317            remaining = rest;
318        }
319
320        // Check for end
321        if let Ok((rest, _)) = tag::<_, _, nom::error::Error<_>>(b">>")(remaining) {
322            return Ok((rest, dict));
323        }
324
325        // Parse name
326        if let Ok((rest, name)) = parse_name(remaining) {
327            remaining = rest;
328
329            // Skip whitespace
330            if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
331                remaining = rest;
332            }
333
334            // Parse value
335            if let Ok((rest, value)) = parse_operand(remaining) {
336                dict.insert(name, value);
337                remaining = rest;
338            } else {
339                break;
340            }
341        } else {
342            break;
343        }
344    }
345
346    Err(nom::Err::Error(nom::error::Error::new(
347        input,
348        nom::error::ErrorKind::Tag,
349    )))
350}
351
352/// Parse operator with its operands from the stack
353fn parse_operator_with_operands<'a>(
354    input: &'a [u8],
355    operand_stack: &mut Vec<Operand>,
356) -> IResult<&'a [u8], ContentOperator> {
357    let (input, op_name) =
358        take_while1(|c: u8| c.is_ascii_alphabetic() || c == b'*' || c == b'\'' || c == b'"')(
359            input,
360        )?;
361
362    let operator = match op_name {
363        // Text operators
364        b"BT" => {
365            operand_stack.clear();
366            ContentOperator::BeginText
367        }
368        b"ET" => {
369            operand_stack.clear();
370            ContentOperator::EndText
371        }
372        b"Tc" => {
373            let spacing = pop_number(operand_stack).unwrap_or(0.0);
374            ContentOperator::SetCharSpace(spacing)
375        }
376        b"Tw" => {
377            let spacing = pop_number(operand_stack).unwrap_or(0.0);
378            ContentOperator::SetWordSpace(spacing)
379        }
380        b"Tz" => {
381            let scale = pop_number(operand_stack).unwrap_or(100.0);
382            ContentOperator::SetHorizontalScale(scale)
383        }
384        b"TL" => {
385            let leading = pop_number(operand_stack).unwrap_or(0.0);
386            ContentOperator::SetLeading(leading)
387        }
388        b"Tf" => {
389            let size = pop_number(operand_stack).unwrap_or(12.0);
390            let font = pop_name(operand_stack).unwrap_or_default();
391            ContentOperator::SetFont(font, size)
392        }
393        b"Tr" => {
394            let mode = pop_number(operand_stack).unwrap_or(0.0) as i32;
395            ContentOperator::SetTextRenderMode(mode)
396        }
397        b"Ts" => {
398            let rise = pop_number(operand_stack).unwrap_or(0.0);
399            ContentOperator::SetTextRise(rise)
400        }
401        b"Td" => {
402            let ty = pop_number(operand_stack).unwrap_or(0.0);
403            let tx = pop_number(operand_stack).unwrap_or(0.0);
404            ContentOperator::MoveText(tx, ty)
405        }
406        b"TD" => {
407            let ty = pop_number(operand_stack).unwrap_or(0.0);
408            let tx = pop_number(operand_stack).unwrap_or(0.0);
409            ContentOperator::MoveText(tx, ty)
410        }
411        b"Tm" => {
412            let f = pop_number(operand_stack).unwrap_or(0.0);
413            let e = pop_number(operand_stack).unwrap_or(0.0);
414            let d = pop_number(operand_stack).unwrap_or(1.0);
415            let c = pop_number(operand_stack).unwrap_or(0.0);
416            let b = pop_number(operand_stack).unwrap_or(0.0);
417            let a = pop_number(operand_stack).unwrap_or(1.0);
418            ContentOperator::SetTextMatrix(a, b, c, d, e, f)
419        }
420        b"T*" => {
421            operand_stack.clear();
422            ContentOperator::MoveTextNextLine
423        }
424        b"Tj" => {
425            let text = pop_string(operand_stack).unwrap_or_default();
426            ContentOperator::ShowText(text)
427        }
428        b"TJ" => {
429            let array = pop_text_array(operand_stack);
430            ContentOperator::ShowTextArray(array)
431        }
432        b"'" => {
433            let text = pop_string(operand_stack).unwrap_or_default();
434            ContentOperator::ShowTextNextLine(text)
435        }
436        b"\"" => {
437            let text = pop_string(operand_stack).unwrap_or_default();
438            let tc = pop_number(operand_stack).unwrap_or(0.0);
439            let tw = pop_number(operand_stack).unwrap_or(0.0);
440            ContentOperator::ShowTextWithSpacing(tw, tc, text)
441        }
442
443        // Graphics operators
444        b"m" => {
445            let y = pop_number(operand_stack).unwrap_or(0.0);
446            let x = pop_number(operand_stack).unwrap_or(0.0);
447            ContentOperator::MoveTo(x, y)
448        }
449        b"l" => {
450            let y = pop_number(operand_stack).unwrap_or(0.0);
451            let x = pop_number(operand_stack).unwrap_or(0.0);
452            ContentOperator::LineTo(x, y)
453        }
454        b"c" => {
455            let y3 = pop_number(operand_stack).unwrap_or(0.0);
456            let x3 = pop_number(operand_stack).unwrap_or(0.0);
457            let y2 = pop_number(operand_stack).unwrap_or(0.0);
458            let x2 = pop_number(operand_stack).unwrap_or(0.0);
459            let y1 = pop_number(operand_stack).unwrap_or(0.0);
460            let x1 = pop_number(operand_stack).unwrap_or(0.0);
461            ContentOperator::CurveTo(x1, y1, x2, y2, x3, y3)
462        }
463        b"v" => {
464            let y3 = pop_number(operand_stack).unwrap_or(0.0);
465            let x3 = pop_number(operand_stack).unwrap_or(0.0);
466            let y2 = pop_number(operand_stack).unwrap_or(0.0);
467            let x2 = pop_number(operand_stack).unwrap_or(0.0);
468            ContentOperator::CurveToV(x2, y2, x3, y3)
469        }
470        b"y" => {
471            let y3 = pop_number(operand_stack).unwrap_or(0.0);
472            let x3 = pop_number(operand_stack).unwrap_or(0.0);
473            let y1 = pop_number(operand_stack).unwrap_or(0.0);
474            let x1 = pop_number(operand_stack).unwrap_or(0.0);
475            ContentOperator::CurveToY(x1, y1, x3, y3)
476        }
477        b"h" => {
478            operand_stack.clear();
479            ContentOperator::ClosePath
480        }
481        b"re" => {
482            let h = pop_number(operand_stack).unwrap_or(0.0);
483            let w = pop_number(operand_stack).unwrap_or(0.0);
484            let y = pop_number(operand_stack).unwrap_or(0.0);
485            let x = pop_number(operand_stack).unwrap_or(0.0);
486            ContentOperator::Rectangle(x, y, w, h)
487        }
488
489        // Path painting
490        b"S" => {
491            operand_stack.clear();
492            ContentOperator::Stroke
493        }
494        b"s" => {
495            operand_stack.clear();
496            ContentOperator::CloseAndStroke
497        }
498        b"f" | b"F" => {
499            operand_stack.clear();
500            ContentOperator::Fill
501        }
502        b"f*" => {
503            operand_stack.clear();
504            ContentOperator::FillEvenOdd
505        }
506        b"B" => {
507            operand_stack.clear();
508            ContentOperator::FillAndStroke
509        }
510        b"B*" => {
511            operand_stack.clear();
512            ContentOperator::FillAndStrokeEvenOdd
513        }
514        b"b" => {
515            operand_stack.clear();
516            ContentOperator::CloseFillAndStroke
517        }
518        b"b*" => {
519            operand_stack.clear();
520            ContentOperator::CloseFillAndStrokeEvenOdd
521        }
522        b"n" => {
523            operand_stack.clear();
524            ContentOperator::EndPath
525        }
526
527        // Clipping
528        b"W" => {
529            operand_stack.clear();
530            ContentOperator::Clip
531        }
532        b"W*" => {
533            operand_stack.clear();
534            ContentOperator::ClipEvenOdd
535        }
536
537        // Graphics state
538        b"q" => {
539            operand_stack.clear();
540            ContentOperator::Save
541        }
542        b"Q" => {
543            operand_stack.clear();
544            ContentOperator::Restore
545        }
546        b"cm" => {
547            let f = pop_number(operand_stack).unwrap_or(0.0);
548            let e = pop_number(operand_stack).unwrap_or(0.0);
549            let d = pop_number(operand_stack).unwrap_or(1.0);
550            let c = pop_number(operand_stack).unwrap_or(0.0);
551            let b = pop_number(operand_stack).unwrap_or(0.0);
552            let a = pop_number(operand_stack).unwrap_or(1.0);
553            ContentOperator::SetMatrix(a, b, c, d, e, f)
554        }
555        b"w" => {
556            let width = pop_number(operand_stack).unwrap_or(1.0);
557            ContentOperator::SetLineWidth(width)
558        }
559        b"J" => {
560            let cap = pop_number(operand_stack).unwrap_or(0.0) as i32;
561            ContentOperator::SetLineCap(cap)
562        }
563        b"j" => {
564            let join = pop_number(operand_stack).unwrap_or(0.0) as i32;
565            ContentOperator::SetLineJoin(join)
566        }
567        b"M" => {
568            let limit = pop_number(operand_stack).unwrap_or(10.0);
569            ContentOperator::SetMiterLimit(limit)
570        }
571        b"d" => {
572            let phase = pop_number(operand_stack).unwrap_or(0.0);
573            let pattern = pop_array(operand_stack);
574            ContentOperator::SetDashPattern(pattern, phase)
575        }
576        b"ri" => {
577            let intent = pop_name(operand_stack).unwrap_or_default();
578            ContentOperator::SetRenderingIntent(intent)
579        }
580        b"i" => {
581            let flatness = pop_number(operand_stack).unwrap_or(1.0);
582            ContentOperator::SetFlatness(flatness)
583        }
584        b"gs" => {
585            let name = pop_name(operand_stack).unwrap_or_default();
586            ContentOperator::SetGraphicsStateParams(name)
587        }
588
589        // Color
590        b"CS" => {
591            let name = pop_name(operand_stack).unwrap_or_default();
592            ContentOperator::SetStrokingColorSpace(name)
593        }
594        b"cs" => {
595            let name = pop_name(operand_stack).unwrap_or_default();
596            ContentOperator::SetColorSpace(name)
597        }
598        b"SC" | b"SCN" => {
599            let mut colors = Vec::new();
600            let mut pattern_name = None;
601
602            // Check if last operand is a name (pattern)
603            if let Some(Operand::Name(n)) = operand_stack.last() {
604                pattern_name = Some(n.clone());
605                operand_stack.pop();
606            }
607
608            // Collect color components
609            while let Some(n) = pop_number(operand_stack) {
610                colors.insert(0, n);
611            }
612
613            if op_name == b"SCN" {
614                ContentOperator::SetStrokingColorN(colors, pattern_name)
615            } else {
616                ContentOperator::SetStrokingColor(colors)
617            }
618        }
619        b"sc" | b"scn" => {
620            let mut colors = Vec::new();
621            let mut pattern_name = None;
622
623            // Check if last operand is a name (pattern)
624            if let Some(Operand::Name(n)) = operand_stack.last() {
625                pattern_name = Some(n.clone());
626                operand_stack.pop();
627            }
628
629            // Collect color components
630            while let Some(n) = pop_number(operand_stack) {
631                colors.insert(0, n);
632            }
633
634            if op_name == b"scn" {
635                ContentOperator::SetColorN(colors, pattern_name)
636            } else {
637                ContentOperator::SetColor(colors)
638            }
639        }
640        b"G" => {
641            let gray = pop_number(operand_stack).unwrap_or(0.0);
642            ContentOperator::SetStrokingGrayLevel(gray)
643        }
644        b"g" => {
645            let gray = pop_number(operand_stack).unwrap_or(0.0);
646            ContentOperator::SetGrayLevel(gray)
647        }
648        b"RG" => {
649            let b = pop_number(operand_stack).unwrap_or(0.0);
650            let g = pop_number(operand_stack).unwrap_or(0.0);
651            let r = pop_number(operand_stack).unwrap_or(0.0);
652            ContentOperator::SetStrokingRGBColor(r, g, b)
653        }
654        b"rg" => {
655            let b = pop_number(operand_stack).unwrap_or(0.0);
656            let g = pop_number(operand_stack).unwrap_or(0.0);
657            let r = pop_number(operand_stack).unwrap_or(0.0);
658            ContentOperator::SetRGBColor(r, g, b)
659        }
660        b"K" => {
661            let k = pop_number(operand_stack).unwrap_or(0.0);
662            let y = pop_number(operand_stack).unwrap_or(0.0);
663            let m = pop_number(operand_stack).unwrap_or(0.0);
664            let c = pop_number(operand_stack).unwrap_or(0.0);
665            ContentOperator::SetStrokingCMYKColor(c, m, y, k)
666        }
667        b"k" => {
668            let k = pop_number(operand_stack).unwrap_or(0.0);
669            let y = pop_number(operand_stack).unwrap_or(0.0);
670            let m = pop_number(operand_stack).unwrap_or(0.0);
671            let c = pop_number(operand_stack).unwrap_or(0.0);
672            ContentOperator::SetCMYKColor(c, m, y, k)
673        }
674
675        // XObject
676        b"Do" => {
677            let name = pop_name(operand_stack).unwrap_or_default();
678            ContentOperator::PaintXObject(name)
679        }
680
681        // Shading
682        b"sh" => {
683            let name = pop_name(operand_stack).unwrap_or_default();
684            ContentOperator::PaintShading(name)
685        }
686
687        // Inline images
688        b"BI" => {
689            operand_stack.clear();
690            ContentOperator::BeginInlineImage
691        }
692
693        // Marked content
694        b"BMC" => {
695            let tag = pop_name(operand_stack).unwrap_or_default();
696            ContentOperator::BeginMarkedContent(tag)
697        }
698        b"BDC" => {
699            let props = if let Some(Operand::Dictionary(d)) = operand_stack.pop() {
700                crate::parser::content_stream::MarkedContentProps::Dictionary(dict_to_pdf_dict(d))
701            } else if let Some(Operand::Name(n)) = operand_stack.pop() {
702                crate::parser::content_stream::MarkedContentProps::Name(n)
703            } else {
704                crate::parser::content_stream::MarkedContentProps::Name(String::new())
705            };
706            let tag = pop_name(operand_stack).unwrap_or_default();
707            ContentOperator::BeginMarkedContentWithProps(tag, props)
708        }
709        b"EMC" => {
710            operand_stack.clear();
711            ContentOperator::EndMarkedContent
712        }
713
714        _ => {
715            // Unknown operator - collect operands
716            let operands: Vec<_> = operand_stack
717                .drain(..)
718                .map(convert_operand_to_content_stream)
719                .collect();
720            ContentOperator::Unknown(String::from_utf8_lossy(op_name).to_string(), operands)
721        }
722    };
723
724    Ok((input, operator))
725}
726
727// Helper functions to pop operands
728fn pop_number(stack: &mut Vec<Operand>) -> Option<f64> {
729    stack.pop().and_then(|op| op.as_number())
730}
731
732fn pop_name(stack: &mut Vec<Operand>) -> Option<String> {
733    stack.pop().and_then(|op| match op {
734        Operand::Name(n) => Some(n),
735        _ => None,
736    })
737}
738
739fn pop_string(stack: &mut Vec<Operand>) -> Option<Vec<u8>> {
740    stack.pop().and_then(|op| match op {
741        Operand::String(s) => Some(s),
742        _ => None,
743    })
744}
745
746fn pop_array(stack: &mut Vec<Operand>) -> Vec<f64> {
747    if let Some(Operand::Array(arr)) = stack.pop() {
748        arr.into_iter().filter_map(|op| op.as_number()).collect()
749    } else {
750        Vec::new()
751    }
752}
753
754fn pop_text_array(stack: &mut Vec<Operand>) -> Vec<TextArrayElement> {
755    if let Some(Operand::Array(arr)) = stack.pop() {
756        arr.into_iter()
757            .map(|op| match op {
758                Operand::String(s) => TextArrayElement::Text(s),
759                Operand::Integer(i) => TextArrayElement::Spacing(i as f64),
760                Operand::Real(r) => TextArrayElement::Spacing(r),
761                _ => TextArrayElement::Spacing(0.0),
762            })
763            .collect()
764    } else {
765        Vec::new()
766    }
767}
768
769fn dict_to_pdf_dict(dict: HashMap<String, Operand>) -> crate::types::PdfDictionary {
770    let mut pdf_dict = crate::types::PdfDictionary::new();
771    for (key, value) in dict {
772        pdf_dict.insert(key, operand_to_pdf_value(value));
773    }
774    pdf_dict
775}
776
777fn operand_to_pdf_value(op: Operand) -> crate::types::PdfValue {
778    match op {
779        Operand::Integer(i) => crate::types::PdfValue::Integer(i),
780        Operand::Real(r) => crate::types::PdfValue::Real(r),
781        Operand::String(s) => {
782            crate::types::PdfValue::String(crate::types::primitive::PdfString::new_literal(s))
783        }
784        Operand::Name(n) => crate::types::PdfValue::Name(crate::types::primitive::PdfName::new(n)),
785        Operand::Boolean(b) => crate::types::PdfValue::Boolean(b),
786        Operand::Null => crate::types::PdfValue::Null,
787        Operand::Array(arr) => {
788            let pdf_arr: Vec<_> = arr.into_iter().map(operand_to_pdf_value).collect();
789            crate::types::PdfValue::Array(crate::types::object::PdfArray::from(pdf_arr))
790        }
791        Operand::Dictionary(dict) => crate::types::PdfValue::Dictionary(dict_to_pdf_dict(dict)),
792    }
793}
794
795/// Parse inline image with dictionary and data
796pub fn parse_inline_image(input: &[u8]) -> IResult<&[u8], InlineImageInfo> {
797    // Skip BI
798    let (input, _) = tag(b"BI")(input)?;
799    let (input, _) = multispace0(input)?;
800
801    // Parse inline image dictionary
802    let mut dict = HashMap::new();
803    let mut remaining = input;
804
805    loop {
806        // Skip whitespace
807        if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
808            remaining = rest;
809        }
810
811        // Check for ID (start of image data)
812        if remaining.starts_with(b"ID") && remaining.len() > 2 && remaining[2].is_ascii_whitespace()
813        {
814            remaining = &remaining[3..]; // Skip "ID" and whitespace
815            break;
816        }
817
818        // Parse abbreviated name
819        if let Ok((rest, abbrev)) = parse_inline_image_key(remaining) {
820            remaining = rest;
821
822            // Skip whitespace
823            if let Ok((rest, _)) = multispace0::<_, nom::error::Error<_>>(remaining) {
824                remaining = rest;
825            }
826
827            // Parse value
828            if let Ok((rest, value)) = parse_operand(remaining) {
829                dict.insert(expand_inline_image_key(&abbrev), value);
830                remaining = rest;
831            }
832        } else {
833            break;
834        }
835    }
836
837    // Find EI to determine data length
838    let mut data_end = 0;
839    for i in 0..remaining.len() {
840        if remaining[i..].starts_with(b"EI") {
841            // Check if EI is followed by whitespace or end
842            if i + 2 >= remaining.len() || remaining[i + 2].is_ascii_whitespace() {
843                data_end = i;
844                break;
845            }
846        }
847    }
848
849    let data = remaining[..data_end].to_vec();
850    let remaining = &remaining[data_end..];
851
852    // Skip EI
853    let (remaining, _) = tag(b"EI")(remaining)?;
854
855    // Build InlineImageInfo
856    let width = dict.get("Width").and_then(|v| v.as_number()).unwrap_or(1.0) as u32;
857
858    let height = dict
859        .get("Height")
860        .and_then(|v| v.as_number())
861        .unwrap_or(1.0) as u32;
862
863    let color_space = dict
864        .get("ColorSpace")
865        .and_then(|v| v.as_name())
866        .unwrap_or("DeviceGray")
867        .to_string();
868
869    let bits_per_component = dict
870        .get("BitsPerComponent")
871        .and_then(|v| v.as_number())
872        .unwrap_or(8.0) as u8;
873
874    let filter = dict
875        .get("Filter")
876        .and_then(|v| v.as_name())
877        .map(|s| s.to_string());
878
879    let decode_params = if dict.contains_key("DecodeParms") {
880        let mut params = HashMap::new();
881        if let Some(Operand::Dictionary(d)) = dict.get("DecodeParms") {
882            for (k, v) in d {
883                params.insert(
884                    k.clone(),
885                    pdf_value_to_content_operand(operand_to_pdf_value(v.clone())),
886                );
887            }
888        }
889        Some(params)
890    } else {
891        None
892    };
893
894    Ok((
895        remaining,
896        InlineImageInfo {
897            width,
898            height,
899            color_space,
900            bits_per_component,
901            filter,
902            decode_params,
903            data,
904        },
905    ))
906}
907
908fn parse_inline_image_key(input: &[u8]) -> IResult<&[u8], String> {
909    let (input, key) = take_while1(|c: u8| c.is_ascii_alphabetic())(input)?;
910    Ok((input, String::from_utf8_lossy(key).to_string()))
911}
912
913fn expand_inline_image_key(abbrev: &str) -> String {
914    match abbrev {
915        "BPC" => "BitsPerComponent",
916        "CS" => "ColorSpace",
917        "D" => "Decode",
918        "DP" => "DecodeParms",
919        "F" => "Filter",
920        "H" => "Height",
921        "IM" => "ImageMask",
922        "I" => "Interpolate",
923        "W" => "Width",
924        _ => abbrev,
925    }
926    .to_string()
927}
928
929fn convert_operand_to_content_stream(op: Operand) -> crate::parser::content_stream::Operand {
930    match op {
931        Operand::Integer(i) => crate::parser::content_stream::Operand::Integer(i),
932        Operand::Real(r) => crate::parser::content_stream::Operand::Real(r),
933        Operand::String(s) => crate::parser::content_stream::Operand::String(s),
934        Operand::Name(n) => crate::parser::content_stream::Operand::Name(n),
935        Operand::Boolean(b) => {
936            // Convert boolean to integer (0 or 1)
937            crate::parser::content_stream::Operand::Integer(if b { 1 } else { 0 })
938        }
939        Operand::Null => {
940            // Convert null to integer 0
941            crate::parser::content_stream::Operand::Integer(0)
942        }
943        Operand::Array(arr) => crate::parser::content_stream::Operand::Array(
944            arr.into_iter()
945                .map(convert_operand_to_content_stream)
946                .collect(),
947        ),
948        Operand::Dictionary(dict) => crate::parser::content_stream::Operand::Dictionary(
949            dict.into_iter()
950                .map(|(k, v)| (k, convert_operand_to_content_stream(v)))
951                .collect(),
952        ),
953    }
954}
955
956fn pdf_value_to_content_operand(
957    val: crate::types::PdfValue,
958) -> crate::parser::content_stream::Operand {
959    match val {
960        crate::types::PdfValue::Integer(i) => crate::parser::content_stream::Operand::Integer(i),
961        crate::types::PdfValue::Real(r) => crate::parser::content_stream::Operand::Real(r),
962        crate::types::PdfValue::String(s) => {
963            crate::parser::content_stream::Operand::String(s.as_bytes().to_vec())
964        }
965        crate::types::PdfValue::Name(n) => {
966            crate::parser::content_stream::Operand::Name(n.without_slash().to_string())
967        }
968        crate::types::PdfValue::Boolean(_) => crate::parser::content_stream::Operand::Integer(1), // Simplified
969        crate::types::PdfValue::Null => crate::parser::content_stream::Operand::Integer(0), // Simplified
970        crate::types::PdfValue::Array(arr) => {
971            let operands: Vec<_> = arr
972                .iter()
973                .map(|v| pdf_value_to_content_operand(v.clone()))
974                .collect();
975            crate::parser::content_stream::Operand::Array(operands)
976        }
977        _ => crate::parser::content_stream::Operand::Integer(0), // Default for unsupported types
978    }
979}