Skip to main content

oxidize_pdf/parser/
content.rs

1//! PDF Content Stream Parser - Complete support for PDF graphics operators
2//!
3//! This module implements comprehensive parsing of PDF content streams according to the PDF specification.
4//! Content streams contain the actual drawing instructions (operators) that render text, graphics, and images
5//! on PDF pages.
6//!
7//! # Overview
8//!
9//! Content streams are sequences of PDF operators that describe:
10//! - Text positioning and rendering
11//! - Path construction and painting
12//! - Color and graphics state management
13//! - Image and XObject placement
14//! - Coordinate transformations
15//!
16//! # Architecture
17//!
18//! The parser is divided into two main components:
19//! - `ContentTokenizer`: Low-level tokenization of content stream bytes
20//! - `ContentParser`: High-level parsing of tokens into structured operations
21//!
22//! # Example
23//!
24//! ```rust,no_run
25//! use oxidize_pdf::parser::content::{ContentParser, ContentOperation};
26//!
27//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
28//! // Parse a content stream
29//! let content_stream = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
30//! let operations = ContentParser::parse_content(content_stream)?;
31//!
32//! // Process operations
33//! for op in operations {
34//!     match op {
35//!         ContentOperation::BeginText => println!("Start text object"),
36//!         ContentOperation::SetFont(name, size) => println!("Font: {} at {}", name, size),
37//!         ContentOperation::ShowText(text) => println!("Text: {:?}", text),
38//!         _ => {}
39//!     }
40//! }
41//! # Ok(())
42//! # }
43//! ```
44//!
45//! # Supported Operators
46//!
47//! This parser supports all standard PDF operators including:
48//! - Text operators (BT, ET, Tj, TJ, Tf, Td, etc.)
49//! - Graphics state operators (q, Q, cm, w, J, etc.)
50//! - Path construction operators (m, l, c, re, h)
51//! - Path painting operators (S, f, B, n, etc.)
52//! - Color operators (g, rg, k, cs, scn, etc.)
53//! - XObject operators (Do)
54//! - Marked content operators (BMC, BDC, EMC, etc.)
55
56use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60/// Represents a single operator in a PDF content stream.
61///
62/// Each variant corresponds to a specific PDF operator and carries the associated
63/// operands. These operations form a complete instruction set for rendering PDF content.
64///
65/// # Categories
66///
67/// Operations are grouped into several categories:
68/// - **Text Object**: BeginText, EndText
69/// - **Text State**: Font, spacing, scaling, rendering mode
70/// - **Text Positioning**: Matrix transforms, moves, line advances
71/// - **Text Showing**: Display text with various formatting
72/// - **Graphics State**: Save/restore, transforms, line properties
73/// - **Path Construction**: Move, line, curve, rectangle operations
74/// - **Path Painting**: Stroke, fill, clipping operations
75/// - **Color**: RGB, CMYK, grayscale, and color space operations
76/// - **XObject**: External graphics and form placement
77/// - **Marked Content**: Semantic tagging for accessibility
78///
79/// # Example
80///
81/// ```rust
82/// use oxidize_pdf::parser::content::{ContentOperation};
83///
84/// // Text operation
85/// let op1 = ContentOperation::ShowText(b"Hello".to_vec());
86///
87/// // Graphics operation
88/// let op2 = ContentOperation::SetLineWidth(2.0);
89///
90/// // Path operation
91/// let op3 = ContentOperation::Rectangle(10.0, 10.0, 100.0, 50.0);
92/// ```
93#[derive(Debug, Clone, PartialEq)]
94pub enum ContentOperation {
95    // Text object operators
96    /// Begin a text object (BT operator).
97    /// All text showing operations must occur within a text object.
98    BeginText,
99
100    /// End a text object (ET operator).
101    /// Closes the current text object started with BeginText.
102    EndText,
103
104    // Text state operators
105    /// Set character spacing (Tc operator).
106    /// Additional space between characters in unscaled text units.
107    SetCharSpacing(f32),
108
109    /// Set word spacing (Tw operator).
110    /// Additional space for ASCII space character (0x20) in unscaled text units.
111    SetWordSpacing(f32),
112
113    /// Set horizontal text scaling (Tz operator).
114    /// Percentage of normal width (100 = normal).
115    SetHorizontalScaling(f32),
116
117    /// Set text leading (TL operator).
118    /// Vertical distance between baselines for T* operator.
119    SetLeading(f32),
120
121    /// Set font and size (Tf operator).
122    /// Font name must match a key in the Resources/Font dictionary.
123    SetFont(String, f32),
124
125    /// Set text rendering mode (Tr operator).
126    /// 0=fill, 1=stroke, 2=fill+stroke, 3=invisible, 4=fill+clip, 5=stroke+clip, 6=fill+stroke+clip, 7=clip
127    SetTextRenderMode(i32),
128
129    /// Set text rise (Ts operator).
130    /// Vertical displacement for superscripts/subscripts in text units.
131    SetTextRise(f32),
132
133    // Text positioning operators
134    /// Move text position (Td operator).
135    /// Translates the text matrix by (tx, ty).
136    MoveText(f32, f32),
137
138    /// Move text position and set leading (TD operator).
139    /// Equivalent to: -ty TL tx ty Td
140    MoveTextSetLeading(f32, f32),
141
142    /// Set text matrix directly (Tm operator).
143    /// Parameters: [a, b, c, d, e, f] for transformation matrix.
144    SetTextMatrix(f32, f32, f32, f32, f32, f32),
145
146    /// Move to start of next line (T* operator).
147    /// Uses the current leading value set with TL.
148    NextLine,
149
150    // Text showing operators
151    /// Show text string (Tj operator).
152    /// The bytes are encoded according to the current font's encoding.
153    ShowText(Vec<u8>),
154
155    /// Show text with individual positioning (TJ operator).
156    /// Array elements can be strings or position adjustments.
157    ShowTextArray(Vec<TextElement>),
158
159    /// Move to next line and show text (' operator).
160    /// Equivalent to: T* string Tj
161    NextLineShowText(Vec<u8>),
162
163    /// Set spacing, move to next line, and show text (" operator).
164    /// Equivalent to: word_spacing Tw char_spacing Tc string '
165    SetSpacingNextLineShowText(f32, f32, Vec<u8>),
166
167    // Graphics state operators
168    /// Save current graphics state (q operator).
169    /// Pushes the entire graphics state onto a stack.
170    SaveGraphicsState,
171
172    /// Restore graphics state (Q operator).
173    /// Pops the graphics state from the stack.
174    RestoreGraphicsState,
175
176    /// Concatenate matrix to current transformation matrix (cm operator).
177    /// Modifies the CTM: CTM' = CTM × [a b c d e f]
178    SetTransformMatrix(f32, f32, f32, f32, f32, f32),
179
180    /// Set line width (w operator) in user space units.
181    SetLineWidth(f32),
182
183    /// Set line cap style (J operator).
184    /// 0=butt cap, 1=round cap, 2=projecting square cap
185    SetLineCap(i32),
186
187    /// Set line join style (j operator).
188    /// 0=miter join, 1=round join, 2=bevel join
189    SetLineJoin(i32),
190
191    /// Set miter limit (M operator).
192    /// Maximum ratio of miter length to line width.
193    SetMiterLimit(f32),
194
195    /// Set dash pattern (d operator).
196    /// Array of dash/gap lengths and starting phase.
197    SetDashPattern(Vec<f32>, f32),
198
199    /// Set rendering intent (ri operator).
200    /// Color rendering intent: /AbsoluteColorimetric, /RelativeColorimetric, /Saturation, /Perceptual
201    SetIntent(String),
202
203    /// Set flatness tolerance (i operator).
204    /// Maximum error when rendering curves as line segments.
205    SetFlatness(f32),
206
207    /// Set graphics state from parameter dictionary (gs operator).
208    /// References ExtGState resource dictionary.
209    SetGraphicsStateParams(String),
210
211    // Path construction operators
212    /// Begin new subpath at point (m operator).
213    MoveTo(f32, f32),
214
215    /// Append straight line segment (l operator).
216    LineTo(f32, f32),
217
218    /// Append cubic Bézier curve (c operator).
219    /// Control points: (x1,y1), (x2,y2), endpoint: (x3,y3)
220    CurveTo(f32, f32, f32, f32, f32, f32),
221
222    /// Append cubic Bézier curve with first control point = current point (v operator).
223    CurveToV(f32, f32, f32, f32),
224
225    /// Append cubic Bézier curve with second control point = endpoint (y operator).
226    CurveToY(f32, f32, f32, f32),
227
228    /// Close current subpath (h operator).
229    /// Appends straight line to starting point.
230    ClosePath,
231
232    /// Append rectangle as complete subpath (re operator).
233    /// Parameters: x, y, width, height
234    Rectangle(f32, f32, f32, f32),
235
236    // Path painting operators
237    /// Stroke the path (S operator).
238    Stroke,
239
240    /// Close and stroke the path (s operator).
241    /// Equivalent to: h S
242    CloseStroke,
243
244    /// Fill the path using nonzero winding rule (f or F operator).
245    Fill,
246
247    /// Fill the path using even-odd rule (f* operator).
248    FillEvenOdd,
249
250    /// Fill then stroke the path (B operator).
251    /// Uses nonzero winding rule.
252    FillStroke,
253
254    /// Fill then stroke using even-odd rule (B* operator).
255    FillStrokeEvenOdd,
256
257    /// Close, fill, and stroke the path (b operator).
258    /// Equivalent to: h B
259    CloseFillStroke,
260
261    /// Close, fill, and stroke using even-odd rule (b* operator).
262    CloseFillStrokeEvenOdd,
263
264    /// End path without filling or stroking (n operator).
265    /// Used primarily before clipping.
266    EndPath,
267
268    // Clipping path operators
269    Clip,        // W
270    ClipEvenOdd, // W*
271
272    // Color operators
273    /// Set stroking color space (CS operator).
274    /// References ColorSpace resource dictionary.
275    SetStrokingColorSpace(String),
276
277    /// Set non-stroking color space (cs operator).
278    /// References ColorSpace resource dictionary.
279    SetNonStrokingColorSpace(String),
280
281    /// Set stroking color (SC, SCN operators).
282    /// Number of components depends on current color space.
283    SetStrokingColor(Vec<f32>),
284
285    /// Set non-stroking color (sc, scn operators).
286    /// Number of components depends on current color space.
287    SetNonStrokingColor(Vec<f32>),
288
289    /// Set stroking color to DeviceGray (G operator).
290    /// 0.0 = black, 1.0 = white
291    SetStrokingGray(f32),
292
293    /// Set non-stroking color to DeviceGray (g operator).
294    SetNonStrokingGray(f32),
295
296    /// Set stroking color to DeviceRGB (RG operator).
297    /// Components range from 0.0 to 1.0.
298    SetStrokingRGB(f32, f32, f32),
299
300    /// Set non-stroking color to DeviceRGB (rg operator).
301    SetNonStrokingRGB(f32, f32, f32),
302
303    /// Set stroking color to DeviceCMYK (K operator).
304    SetStrokingCMYK(f32, f32, f32, f32),
305
306    /// Set non-stroking color to DeviceCMYK (k operator).
307    SetNonStrokingCMYK(f32, f32, f32, f32),
308
309    // Shading operators
310    ShadingFill(String), // sh
311
312    // Inline image operators
313    /// Begin inline image (BI operator)
314    BeginInlineImage,
315    /// Inline image with parsed dictionary and data
316    InlineImage {
317        /// Image parameters (width, height, colorspace, etc.)
318        params: HashMap<String, Object>,
319        /// Raw image data
320        data: Vec<u8>,
321    },
322
323    // XObject operators
324    /// Paint external object (Do operator).
325    /// References XObject resource dictionary (images, forms).
326    PaintXObject(String),
327
328    // Marked content operators
329    BeginMarkedContent(String),                                   // BMC
330    BeginMarkedContentWithProps(String, HashMap<String, String>), // BDC
331    EndMarkedContent,                                             // EMC
332    DefineMarkedContentPoint(String),                             // MP
333    DefineMarkedContentPointWithProps(String, HashMap<String, String>), // DP
334
335    // Compatibility operators
336    BeginCompatibility, // BX
337    EndCompatibility,   // EX
338}
339
340/// Represents a text element in a TJ array for ShowTextArray operations.
341///
342/// The TJ operator takes an array of strings and position adjustments,
343/// allowing fine control over character and word spacing.
344///
345/// # Example
346///
347/// ```rust
348/// use oxidize_pdf::parser::content::{TextElement, ContentOperation};
349///
350/// // TJ array: [(Hello) -50 (World)]
351/// let tj_array = vec![
352///     TextElement::Text(b"Hello".to_vec()),
353///     TextElement::Spacing(-50.0), // Move left 50 units
354///     TextElement::Text(b"World".to_vec()),
355/// ];
356/// let op = ContentOperation::ShowTextArray(tj_array);
357/// ```
358#[derive(Debug, Clone, PartialEq)]
359pub enum TextElement {
360    /// Text string to show
361    Text(Vec<u8>),
362    /// Position adjustment in thousandths of text space units
363    /// Negative values move to the right (decrease spacing)
364    Spacing(f32),
365}
366
367/// Token types in content streams
368#[derive(Debug, Clone, PartialEq)]
369pub(super) enum Token {
370    Number(f32),
371    Integer(i32),
372    String(Vec<u8>),
373    HexString(Vec<u8>),
374    Name(String),
375    Operator(String),
376    ArrayStart,
377    ArrayEnd,
378    DictStart,
379    DictEnd,
380}
381
382/// Content stream tokenizer
383pub struct ContentTokenizer<'a> {
384    input: &'a [u8],
385    position: usize,
386}
387
388impl<'a> ContentTokenizer<'a> {
389    /// Create a new tokenizer for the given input
390    pub fn new(input: &'a [u8]) -> Self {
391        Self { input, position: 0 }
392    }
393
394    /// Get the next token from the stream
395    pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
396        self.skip_whitespace();
397
398        if self.position >= self.input.len() {
399            return Ok(None);
400        }
401
402        let ch = self.input[self.position];
403
404        match ch {
405            // Numbers
406            b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
407
408            // Strings
409            b'(' => self.read_literal_string(),
410            b'<' => {
411                if self.peek_next() == Some(b'<') {
412                    self.position += 2;
413                    Ok(Some(Token::DictStart))
414                } else {
415                    self.read_hex_string()
416                }
417            }
418            b'>' => {
419                if self.peek_next() == Some(b'>') {
420                    self.position += 2;
421                    Ok(Some(Token::DictEnd))
422                } else {
423                    Err(ParseError::SyntaxError {
424                        position: self.position,
425                        message: "Unexpected '>'".to_string(),
426                    })
427                }
428            }
429
430            // Arrays
431            b'[' => {
432                self.position += 1;
433                Ok(Some(Token::ArrayStart))
434            }
435            b']' => {
436                self.position += 1;
437                Ok(Some(Token::ArrayEnd))
438            }
439
440            // Names
441            b'/' => self.read_name(),
442
443            // Skip semicolons (corrupted content recovery)
444            b';' => {
445                self.position += 1;
446                self.next_token() // Recursively get next valid token
447            }
448
449            // Operators or other tokens
450            _ => self.read_operator(),
451        }
452    }
453
454    fn skip_whitespace(&mut self) {
455        while self.position < self.input.len() {
456            match self.input[self.position] {
457                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
458                b'%' => self.skip_comment(),
459                _ => break,
460            }
461        }
462    }
463
464    fn skip_comment(&mut self) {
465        while self.position < self.input.len() && self.input[self.position] != b'\n' {
466            self.position += 1;
467        }
468    }
469
470    fn peek_next(&self) -> Option<u8> {
471        if self.position + 1 < self.input.len() {
472            Some(self.input[self.position + 1])
473        } else {
474            None
475        }
476    }
477
478    fn read_number(&mut self) -> ParseResult<Option<Token>> {
479        let start = self.position;
480        let mut has_dot = false;
481
482        // Handle optional sign
483        if self.position < self.input.len()
484            && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
485        {
486            self.position += 1;
487        }
488
489        // Read digits and optional decimal point
490        while self.position < self.input.len() {
491            match self.input[self.position] {
492                b'0'..=b'9' => self.position += 1,
493                b'.' if !has_dot => {
494                    has_dot = true;
495                    self.position += 1;
496                }
497                _ => break,
498            }
499        }
500
501        let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
502            ParseError::SyntaxError {
503                position: start,
504                message: "Invalid number format".to_string(),
505            }
506        })?;
507
508        if has_dot {
509            let value = num_str
510                .parse::<f32>()
511                .map_err(|_| ParseError::SyntaxError {
512                    position: start,
513                    message: "Invalid float number".to_string(),
514                })?;
515            Ok(Some(Token::Number(value)))
516        } else {
517            let value = num_str
518                .parse::<i32>()
519                .map_err(|_| ParseError::SyntaxError {
520                    position: start,
521                    message: "Invalid integer number".to_string(),
522                })?;
523            Ok(Some(Token::Integer(value)))
524        }
525    }
526
527    fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
528        self.position += 1; // Skip opening '('
529        let mut result = Vec::new();
530        let mut paren_depth = 1;
531        let mut escape = false;
532
533        while self.position < self.input.len() && paren_depth > 0 {
534            let ch = self.input[self.position];
535            self.position += 1;
536
537            if escape {
538                match ch {
539                    b'n' => result.push(b'\n'),
540                    b'r' => result.push(b'\r'),
541                    b't' => result.push(b'\t'),
542                    b'b' => result.push(b'\x08'),
543                    b'f' => result.push(b'\x0C'),
544                    b'(' => result.push(b'('),
545                    b')' => result.push(b')'),
546                    b'\\' => result.push(b'\\'),
547                    b'0'..=b'7' => {
548                        // Octal escape sequence
549                        self.position -= 1;
550                        let octal_value = self.read_octal_escape()?;
551                        result.push(octal_value);
552                    }
553                    _ => result.push(ch), // Unknown escape, treat as literal
554                }
555                escape = false;
556            } else {
557                match ch {
558                    b'\\' => escape = true,
559                    b'(' => {
560                        paren_depth += 1;
561                        result.push(ch);
562                    }
563                    b')' => {
564                        paren_depth -= 1;
565                        if paren_depth > 0 {
566                            result.push(ch);
567                        }
568                    }
569                    _ => result.push(ch),
570                }
571            }
572        }
573
574        Ok(Some(Token::String(result)))
575    }
576
577    fn read_octal_escape(&mut self) -> ParseResult<u8> {
578        let mut value = 0u8;
579        let mut count = 0;
580
581        while count < 3 && self.position < self.input.len() {
582            match self.input[self.position] {
583                b'0'..=b'7' => {
584                    value = value * 8 + (self.input[self.position] - b'0');
585                    self.position += 1;
586                    count += 1;
587                }
588                _ => break,
589            }
590        }
591
592        Ok(value)
593    }
594
595    fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
596        self.position += 1; // Skip opening '<'
597        let mut result = Vec::new();
598        let mut nibble = None;
599
600        while self.position < self.input.len() {
601            let ch = self.input[self.position];
602
603            match ch {
604                b'>' => {
605                    self.position += 1;
606                    // Handle odd number of hex digits
607                    if let Some(n) = nibble {
608                        result.push(n << 4);
609                    }
610                    return Ok(Some(Token::HexString(result)));
611                }
612                b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
613                    let digit = if ch <= b'9' {
614                        ch - b'0'
615                    } else if ch <= b'F' {
616                        ch - b'A' + 10
617                    } else {
618                        ch - b'a' + 10
619                    };
620
621                    if let Some(n) = nibble {
622                        result.push((n << 4) | digit);
623                        nibble = None;
624                    } else {
625                        nibble = Some(digit);
626                    }
627                    self.position += 1;
628                }
629                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
630                    // Skip whitespace in hex strings
631                    self.position += 1;
632                }
633                _ => {
634                    return Err(ParseError::SyntaxError {
635                        position: self.position,
636                        message: format!("Invalid character in hex string: {:?}", ch as char),
637                    });
638                }
639            }
640        }
641
642        Err(ParseError::SyntaxError {
643            position: self.position,
644            message: "Unterminated hex string".to_string(),
645        })
646    }
647
648    fn read_name(&mut self) -> ParseResult<Option<Token>> {
649        self.position += 1; // Skip '/'
650        let start = self.position;
651
652        while self.position < self.input.len() {
653            let ch = self.input[self.position];
654            match ch {
655                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
656                | b']' | b'{' | b'}' | b'/' | b'%' => break,
657                b'#' => {
658                    // Handle hex escape in name
659                    self.position += 1;
660                    if self.position + 1 < self.input.len() {
661                        self.position += 2;
662                    }
663                }
664                _ => self.position += 1,
665            }
666        }
667
668        let name_bytes = &self.input[start..self.position];
669        let name = self.decode_name(name_bytes)?;
670        Ok(Some(Token::Name(name)))
671    }
672
673    fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
674        let mut result = Vec::new();
675        let mut i = 0;
676
677        while i < bytes.len() {
678            if bytes[i] == b'#' && i + 2 < bytes.len() {
679                // Hex escape
680                let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
681                    ParseError::SyntaxError {
682                        position: self.position,
683                        message: "Invalid hex escape in name".to_string(),
684                    }
685                })?;
686                let value =
687                    u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
688                        position: self.position,
689                        message: "Invalid hex escape in name".to_string(),
690                    })?;
691                result.push(value);
692                i += 3;
693            } else {
694                result.push(bytes[i]);
695                i += 1;
696            }
697        }
698
699        String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
700            position: self.position,
701            message: "Invalid UTF-8 in name".to_string(),
702        })
703    }
704
705    fn read_operator(&mut self) -> ParseResult<Option<Token>> {
706        let start = self.position;
707
708        while self.position < self.input.len() {
709            let ch = self.input[self.position];
710            match ch {
711                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
712                | b']' | b'{' | b'}' | b'/' | b'%' | b';' => break,
713                _ => self.position += 1,
714            }
715        }
716
717        let op_bytes = &self.input[start..self.position];
718        let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
719            position: start,
720            message: "Invalid operator".to_string(),
721        })?;
722
723        Ok(Some(Token::Operator(op.to_string())))
724    }
725}
726
727/// High-level content stream parser.
728///
729/// Converts tokenized content streams into structured `ContentOperation` values.
730/// This parser handles the operand stack and operator parsing according to PDF specifications.
731///
732/// # Usage
733///
734/// The parser is typically used through its static methods:
735///
736/// ```rust
737/// use oxidize_pdf::parser::content::ContentParser;
738///
739/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
740/// let content = b"q 1 0 0 1 50 50 cm 100 100 200 150 re S Q";
741/// let operations = ContentParser::parse(content)?;
742/// # Ok(())
743/// # }
744/// ```
745pub struct ContentParser {
746    tokens: Vec<Token>,
747    position: usize,
748}
749
750impl ContentParser {
751    /// Create a new content parser
752    pub fn new(_content: &[u8]) -> Self {
753        Self {
754            tokens: Vec::new(),
755            position: 0,
756        }
757    }
758
759    /// Parse a content stream into a vector of operators.
760    ///
761    /// This is a convenience method that creates a parser and processes the entire stream.
762    ///
763    /// # Arguments
764    ///
765    /// * `content` - Raw content stream bytes (may be compressed)
766    ///
767    /// # Returns
768    ///
769    /// A vector of parsed `ContentOperation` values in the order they appear.
770    ///
771    /// # Errors
772    ///
773    /// Returns an error if:
774    /// - Invalid operator syntax is encountered
775    /// - Operators have incorrect number/type of operands
776    /// - Unknown operators are found
777    ///
778    /// # Example
779    ///
780    /// ```rust
781    /// use oxidize_pdf::parser::content::{ContentParser, ContentOperation};
782    ///
783    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
784    /// let content = b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET";
785    /// let operations = ContentParser::parse(content)?;
786    ///
787    /// assert_eq!(operations.len(), 5);
788    /// assert!(matches!(operations[0], ContentOperation::BeginText));
789    /// # Ok(())
790    /// # }
791    /// ```
792    pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
793        Self::parse_content(content)
794    }
795
796    /// Parse a content stream into a vector of operators.
797    ///
798    /// This method tokenizes the input and converts it to operations.
799    /// It handles the PDF postfix notation where operands precede operators.
800    pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
801        let mut tokenizer = ContentTokenizer::new(content);
802        let mut tokens = Vec::new();
803
804        // Tokenize the entire stream
805        while let Some(token) = tokenizer.next_token()? {
806            tokens.push(token);
807        }
808
809        let mut parser = Self {
810            tokens,
811            position: 0,
812        };
813
814        parser.parse_operators()
815    }
816
817    fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
818        let mut operators = Vec::new();
819        let mut operand_stack: Vec<Token> = Vec::new();
820
821        while self.position < self.tokens.len() {
822            let token = self.tokens[self.position].clone();
823            self.position += 1;
824
825            match &token {
826                Token::Operator(op) => {
827                    let operator = self.parse_operator(op, &mut operand_stack)?;
828                    operators.push(operator);
829                }
830                _ => {
831                    // Not an operator, push to operand stack
832                    operand_stack.push(token);
833                }
834            }
835        }
836
837        Ok(operators)
838    }
839
840    fn parse_operator(
841        &mut self,
842        op: &str,
843        operands: &mut Vec<Token>,
844    ) -> ParseResult<ContentOperation> {
845        let operator = match op {
846            // Text object operators
847            "BT" => ContentOperation::BeginText,
848            "ET" => ContentOperation::EndText,
849
850            // Text state operators
851            "Tc" => {
852                let spacing = self.pop_number(operands)?;
853                ContentOperation::SetCharSpacing(spacing)
854            }
855            "Tw" => {
856                let spacing = self.pop_number(operands)?;
857                ContentOperation::SetWordSpacing(spacing)
858            }
859            "Tz" => {
860                let scale = self.pop_number(operands)?;
861                ContentOperation::SetHorizontalScaling(scale)
862            }
863            "TL" => {
864                let leading = self.pop_number(operands)?;
865                ContentOperation::SetLeading(leading)
866            }
867            "Tf" => {
868                let size = self.pop_number(operands)?;
869                let font = self.pop_name(operands)?;
870                ContentOperation::SetFont(font, size)
871            }
872            "Tr" => {
873                let mode = self.pop_integer(operands)?;
874                ContentOperation::SetTextRenderMode(mode)
875            }
876            "Ts" => {
877                let rise = self.pop_number(operands)?;
878                ContentOperation::SetTextRise(rise)
879            }
880
881            // Text positioning operators
882            "Td" => {
883                let ty = self.pop_number(operands)?;
884                let tx = self.pop_number(operands)?;
885                ContentOperation::MoveText(tx, ty)
886            }
887            "TD" => {
888                let ty = self.pop_number(operands)?;
889                let tx = self.pop_number(operands)?;
890                ContentOperation::MoveTextSetLeading(tx, ty)
891            }
892            "Tm" => {
893                let f = self.pop_number(operands)?;
894                let e = self.pop_number(operands)?;
895                let d = self.pop_number(operands)?;
896                let c = self.pop_number(operands)?;
897                let b = self.pop_number(operands)?;
898                let a = self.pop_number(operands)?;
899                ContentOperation::SetTextMatrix(a, b, c, d, e, f)
900            }
901            "T*" => ContentOperation::NextLine,
902
903            // Text showing operators
904            "Tj" => {
905                let text = self.pop_string(operands)?;
906                ContentOperation::ShowText(text)
907            }
908            "TJ" => {
909                let array = self.pop_array(operands)?;
910                let elements = self.parse_text_array(array)?;
911                ContentOperation::ShowTextArray(elements)
912            }
913            "'" => {
914                let text = self.pop_string(operands)?;
915                ContentOperation::NextLineShowText(text)
916            }
917            "\"" => {
918                let text = self.pop_string(operands)?;
919                let aw = self.pop_number(operands)?;
920                let ac = self.pop_number(operands)?;
921                ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
922            }
923
924            // Graphics state operators
925            "q" => ContentOperation::SaveGraphicsState,
926            "Q" => ContentOperation::RestoreGraphicsState,
927            "cm" => {
928                let f = self.pop_number(operands)?;
929                let e = self.pop_number(operands)?;
930                let d = self.pop_number(operands)?;
931                let c = self.pop_number(operands)?;
932                let b = self.pop_number(operands)?;
933                let a = self.pop_number(operands)?;
934                ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
935            }
936            "w" => {
937                let width = self.pop_number(operands)?;
938                ContentOperation::SetLineWidth(width)
939            }
940            "J" => {
941                let cap = self.pop_integer(operands)?;
942                ContentOperation::SetLineCap(cap)
943            }
944            "j" => {
945                let join = self.pop_integer(operands)?;
946                ContentOperation::SetLineJoin(join)
947            }
948            "M" => {
949                let limit = self.pop_number(operands)?;
950                ContentOperation::SetMiterLimit(limit)
951            }
952            "d" => {
953                let phase = self.pop_number(operands)?;
954                let array = self.pop_array(operands)?;
955                let pattern = self.parse_dash_array(array)?;
956                ContentOperation::SetDashPattern(pattern, phase)
957            }
958            "ri" => {
959                let intent = self.pop_name(operands)?;
960                ContentOperation::SetIntent(intent)
961            }
962            "i" => {
963                let flatness = self.pop_number(operands)?;
964                ContentOperation::SetFlatness(flatness)
965            }
966            "gs" => {
967                let name = self.pop_name(operands)?;
968                ContentOperation::SetGraphicsStateParams(name)
969            }
970
971            // Path construction operators
972            "m" => {
973                let y = self.pop_number(operands)?;
974                let x = self.pop_number(operands)?;
975                ContentOperation::MoveTo(x, y)
976            }
977            "l" => {
978                let y = self.pop_number(operands)?;
979                let x = self.pop_number(operands)?;
980                ContentOperation::LineTo(x, y)
981            }
982            "c" => {
983                let y3 = self.pop_number(operands)?;
984                let x3 = self.pop_number(operands)?;
985                let y2 = self.pop_number(operands)?;
986                let x2 = self.pop_number(operands)?;
987                let y1 = self.pop_number(operands)?;
988                let x1 = self.pop_number(operands)?;
989                ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
990            }
991            "v" => {
992                let y3 = self.pop_number(operands)?;
993                let x3 = self.pop_number(operands)?;
994                let y2 = self.pop_number(operands)?;
995                let x2 = self.pop_number(operands)?;
996                ContentOperation::CurveToV(x2, y2, x3, y3)
997            }
998            "y" => {
999                let y3 = self.pop_number(operands)?;
1000                let x3 = self.pop_number(operands)?;
1001                let y1 = self.pop_number(operands)?;
1002                let x1 = self.pop_number(operands)?;
1003                ContentOperation::CurveToY(x1, y1, x3, y3)
1004            }
1005            "h" => ContentOperation::ClosePath,
1006            "re" => {
1007                let height = self.pop_number(operands)?;
1008                let width = self.pop_number(operands)?;
1009                let y = self.pop_number(operands)?;
1010                let x = self.pop_number(operands)?;
1011                ContentOperation::Rectangle(x, y, width, height)
1012            }
1013
1014            // Path painting operators
1015            "S" => ContentOperation::Stroke,
1016            "s" => ContentOperation::CloseStroke,
1017            "f" | "F" => ContentOperation::Fill,
1018            "f*" => ContentOperation::FillEvenOdd,
1019            "B" => ContentOperation::FillStroke,
1020            "B*" => ContentOperation::FillStrokeEvenOdd,
1021            "b" => ContentOperation::CloseFillStroke,
1022            "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1023            "n" => ContentOperation::EndPath,
1024
1025            // Clipping path operators
1026            "W" => ContentOperation::Clip,
1027            "W*" => ContentOperation::ClipEvenOdd,
1028
1029            // Color operators
1030            "CS" => {
1031                let name = self.pop_name(operands)?;
1032                ContentOperation::SetStrokingColorSpace(name)
1033            }
1034            "cs" => {
1035                let name = self.pop_name(operands)?;
1036                ContentOperation::SetNonStrokingColorSpace(name)
1037            }
1038            "SC" | "SCN" => {
1039                let components = self.pop_color_components(operands)?;
1040                ContentOperation::SetStrokingColor(components)
1041            }
1042            "sc" | "scn" => {
1043                let components = self.pop_color_components(operands)?;
1044                ContentOperation::SetNonStrokingColor(components)
1045            }
1046            "G" => {
1047                let gray = self.pop_number(operands)?;
1048                ContentOperation::SetStrokingGray(gray)
1049            }
1050            "g" => {
1051                let gray = self.pop_number(operands)?;
1052                ContentOperation::SetNonStrokingGray(gray)
1053            }
1054            "RG" => {
1055                let b = self.pop_number(operands)?;
1056                let g = self.pop_number(operands)?;
1057                let r = self.pop_number(operands)?;
1058                ContentOperation::SetStrokingRGB(r, g, b)
1059            }
1060            "rg" => {
1061                let b = self.pop_number(operands)?;
1062                let g = self.pop_number(operands)?;
1063                let r = self.pop_number(operands)?;
1064                ContentOperation::SetNonStrokingRGB(r, g, b)
1065            }
1066            "K" => {
1067                let k = self.pop_number(operands)?;
1068                let y = self.pop_number(operands)?;
1069                let m = self.pop_number(operands)?;
1070                let c = self.pop_number(operands)?;
1071                ContentOperation::SetStrokingCMYK(c, m, y, k)
1072            }
1073            "k" => {
1074                let k = self.pop_number(operands)?;
1075                let y = self.pop_number(operands)?;
1076                let m = self.pop_number(operands)?;
1077                let c = self.pop_number(operands)?;
1078                ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1079            }
1080
1081            // Shading operators
1082            "sh" => {
1083                let name = self.pop_name(operands)?;
1084                ContentOperation::ShadingFill(name)
1085            }
1086
1087            // XObject operators
1088            "Do" => {
1089                let name = self.pop_name(operands)?;
1090                ContentOperation::PaintXObject(name)
1091            }
1092
1093            // Marked content operators
1094            "BMC" => {
1095                let tag = self.pop_name(operands)?;
1096                ContentOperation::BeginMarkedContent(tag)
1097            }
1098            "BDC" => {
1099                let props = self.pop_dict_or_name(operands)?;
1100                let tag = self.pop_name(operands)?;
1101                ContentOperation::BeginMarkedContentWithProps(tag, props)
1102            }
1103            "EMC" => ContentOperation::EndMarkedContent,
1104            "MP" => {
1105                let tag = self.pop_name(operands)?;
1106                ContentOperation::DefineMarkedContentPoint(tag)
1107            }
1108            "DP" => {
1109                let props = self.pop_dict_or_name(operands)?;
1110                let tag = self.pop_name(operands)?;
1111                ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1112            }
1113
1114            // Compatibility operators
1115            "BX" => ContentOperation::BeginCompatibility,
1116            "EX" => ContentOperation::EndCompatibility,
1117
1118            // Inline images are handled specially
1119            "BI" => {
1120                operands.clear(); // Clear any remaining operands
1121                self.parse_inline_image()?
1122            }
1123
1124            _ => {
1125                return Err(ParseError::SyntaxError {
1126                    position: self.position,
1127                    message: format!("Unknown operator: {op}"),
1128                });
1129            }
1130        };
1131
1132        operands.clear(); // Clear operands after processing
1133        Ok(operator)
1134    }
1135
1136    // Helper methods for popping operands
1137    fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1138        match operands.pop() {
1139            Some(Token::Number(n)) => Ok(n),
1140            Some(Token::Integer(i)) => Ok(i as f32),
1141            _ => Err(ParseError::SyntaxError {
1142                position: self.position,
1143                message: "Expected number operand".to_string(),
1144            }),
1145        }
1146    }
1147
1148    fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1149        match operands.pop() {
1150            Some(Token::Integer(i)) => Ok(i),
1151            _ => Err(ParseError::SyntaxError {
1152                position: self.position,
1153                message: "Expected integer operand".to_string(),
1154            }),
1155        }
1156    }
1157
1158    fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1159        match operands.pop() {
1160            Some(Token::Name(n)) => Ok(n),
1161            _ => Err(ParseError::SyntaxError {
1162                position: self.position,
1163                message: "Expected name operand".to_string(),
1164            }),
1165        }
1166    }
1167
1168    fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1169        match operands.pop() {
1170            Some(Token::String(s)) => Ok(s),
1171            Some(Token::HexString(s)) => Ok(s),
1172            _ => Err(ParseError::SyntaxError {
1173                position: self.position,
1174                message: "Expected string operand".to_string(),
1175            }),
1176        }
1177    }
1178
1179    fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1180        // First check if we have an ArrayEnd at the top (which we should for a complete array)
1181        let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1182        if has_array_end {
1183            operands.pop(); // Remove the ArrayEnd
1184        }
1185
1186        let mut array = Vec::new();
1187        let mut found_start = false;
1188
1189        // Pop tokens until we find ArrayStart
1190        while let Some(token) = operands.pop() {
1191            match token {
1192                Token::ArrayStart => {
1193                    found_start = true;
1194                    break;
1195                }
1196                Token::ArrayEnd => {
1197                    // Skip any additional ArrayEnd tokens (shouldn't happen in well-formed PDFs)
1198                    continue;
1199                }
1200                _ => array.push(token),
1201            }
1202        }
1203
1204        if !found_start {
1205            return Err(ParseError::SyntaxError {
1206                position: self.position,
1207                message: "Expected array".to_string(),
1208            });
1209        }
1210
1211        array.reverse(); // We collected in reverse order
1212        Ok(array)
1213    }
1214
1215    fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1216        if let Some(token) = operands.pop() {
1217            match token {
1218                Token::Name(name) => {
1219                    // Name token - this is a reference to properties in the resource dictionary
1220                    // For now, we'll store it as a special entry to indicate it's a resource reference
1221                    let mut props = HashMap::new();
1222                    props.insert("__resource_ref".to_string(), name);
1223                    Ok(props)
1224                }
1225                Token::DictEnd => {
1226                    // Inline dictionary - tokens are on stack in reverse order:
1227                    // Stack: [..., DictStart, Name("key"), Value, DictEnd] <- top
1228                    // After popping DictEnd, we need to pop value-key pairs until DictStart
1229                    let mut props = HashMap::new();
1230
1231                    // Collect key-value pairs (values come before keys on stack)
1232                    while let Some(value_token) = operands.pop() {
1233                        if matches!(value_token, Token::DictStart) {
1234                            break;
1235                        }
1236
1237                        // In PDF dict syntax: /Key Value
1238                        // On stack after tokenization: [DictStart, Name(Key), Value, ...]
1239                        // Popping gives us: Value first, then Key
1240                        let value = match &value_token {
1241                            Token::Name(name) => name.clone(),
1242                            Token::String(s) => String::from_utf8_lossy(s).to_string(),
1243                            Token::Integer(i) => i.to_string(),
1244                            Token::Number(f) => f.to_string(),
1245                            Token::ArrayEnd => {
1246                                // Array value - collect elements until ArrayStart
1247                                let mut array_elements = Vec::new();
1248                                while let Some(arr_token) = operands.pop() {
1249                                    match arr_token {
1250                                        Token::ArrayStart => break,
1251                                        Token::Name(n) => array_elements.push(n),
1252                                        Token::String(s) => array_elements
1253                                            .push(String::from_utf8_lossy(&s).to_string()),
1254                                        Token::Integer(i) => array_elements.push(i.to_string()),
1255                                        Token::Number(f) => array_elements.push(f.to_string()),
1256                                        _ => {} // Skip other token types in array
1257                                    }
1258                                }
1259                                array_elements.reverse();
1260                                format!("[{}]", array_elements.join(", "))
1261                            }
1262                            _ => continue, // Skip unsupported value types
1263                        };
1264
1265                        // Now pop the key (should be a Name)
1266                        if let Some(Token::Name(key)) = operands.pop() {
1267                            props.insert(key, value);
1268                        }
1269                    }
1270
1271                    Ok(props)
1272                }
1273                _ => {
1274                    // Unexpected token type, treat as empty properties
1275                    Ok(HashMap::new())
1276                }
1277            }
1278        } else {
1279            // No operand available
1280            Err(ParseError::SyntaxError {
1281                position: 0,
1282                message: "Expected dictionary or name for marked content properties".to_string(),
1283            })
1284        }
1285    }
1286
1287    fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1288        let mut components = Vec::new();
1289
1290        // Pop all numeric values from the stack
1291        while let Some(token) = operands.last() {
1292            match token {
1293                Token::Number(n) => {
1294                    components.push(*n);
1295                    operands.pop();
1296                }
1297                Token::Integer(i) => {
1298                    components.push(*i as f32);
1299                    operands.pop();
1300                }
1301                _ => break,
1302            }
1303        }
1304
1305        components.reverse();
1306        Ok(components)
1307    }
1308
1309    fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1310        let mut elements = Vec::new();
1311
1312        for token in tokens {
1313            match token {
1314                Token::String(s) | Token::HexString(s) => {
1315                    elements.push(TextElement::Text(s));
1316                }
1317                Token::Number(n) => {
1318                    elements.push(TextElement::Spacing(n));
1319                }
1320                Token::Integer(i) => {
1321                    elements.push(TextElement::Spacing(i as f32));
1322                }
1323                _ => {
1324                    return Err(ParseError::SyntaxError {
1325                        position: self.position,
1326                        message: "Invalid element in text array".to_string(),
1327                    });
1328                }
1329            }
1330        }
1331
1332        Ok(elements)
1333    }
1334
1335    fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1336        let mut pattern = Vec::new();
1337
1338        for token in tokens {
1339            match token {
1340                Token::Number(n) => pattern.push(n),
1341                Token::Integer(i) => pattern.push(i as f32),
1342                _ => {
1343                    return Err(ParseError::SyntaxError {
1344                        position: self.position,
1345                        message: "Invalid element in dash array".to_string(),
1346                    });
1347                }
1348            }
1349        }
1350
1351        Ok(pattern)
1352    }
1353
1354    fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1355        // Parse inline image dictionary until we find ID
1356        let mut params = HashMap::new();
1357
1358        while self.position < self.tokens.len() {
1359            // Check if we've reached the ID operator
1360            if let Token::Operator(op) = &self.tokens[self.position] {
1361                if op == "ID" {
1362                    self.position += 1;
1363                    break;
1364                }
1365            }
1366
1367            // Parse key-value pairs for image parameters
1368            // Keys are abbreviated in inline images:
1369            // /W -> Width, /H -> Height, /CS -> ColorSpace, /BPC -> BitsPerComponent
1370            // /F -> Filter, /DP -> DecodeParms, /IM -> ImageMask, /I -> Interpolate
1371            if let Token::Name(key) = &self.tokens[self.position] {
1372                self.position += 1;
1373                if self.position >= self.tokens.len() {
1374                    break;
1375                }
1376
1377                // Parse the value
1378                let value = match &self.tokens[self.position] {
1379                    Token::Integer(n) => Object::Integer(*n as i64),
1380                    Token::Number(n) => Object::Real(*n as f64),
1381                    Token::Name(s) => Object::Name(expand_inline_name(s)),
1382                    Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1383                    Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1384                    _ => Object::Null,
1385                };
1386
1387                // Expand abbreviated keys to full names
1388                let full_key = expand_inline_key(key);
1389                params.insert(full_key, value);
1390                self.position += 1;
1391            } else {
1392                self.position += 1;
1393            }
1394        }
1395
1396        // Now we should be at the image data
1397        // Collect bytes until we find EI
1398        let mut data = Vec::new();
1399
1400        // For inline images, we need to read raw bytes until EI
1401        // This is tricky because EI could appear in the image data
1402        // We need to look for EI followed by a whitespace or operator
1403
1404        // Simplified approach: collect all tokens until we find EI operator
1405        while self.position < self.tokens.len() {
1406            if let Token::Operator(op) = &self.tokens[self.position] {
1407                if op == "EI" {
1408                    self.position += 1;
1409                    break;
1410                }
1411            }
1412
1413            // Convert token to bytes (simplified - real implementation would need raw byte access)
1414            match &self.tokens[self.position] {
1415                Token::String(bytes) => data.extend_from_slice(bytes),
1416                Token::HexString(bytes) => data.extend_from_slice(bytes),
1417                Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1418                Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1419                Token::Name(s) => data.extend_from_slice(s.as_bytes()),
1420                Token::Operator(s) if s != "EI" => data.extend_from_slice(s.as_bytes()),
1421                _ => {}
1422            }
1423            self.position += 1;
1424        }
1425
1426        Ok(ContentOperation::InlineImage { params, data })
1427    }
1428}
1429
1430/// Expand abbreviated inline image key names to full names
1431fn expand_inline_key(key: &str) -> String {
1432    match key {
1433        "W" => "Width".to_string(),
1434        "H" => "Height".to_string(),
1435        "CS" | "ColorSpace" => "ColorSpace".to_string(),
1436        "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1437        "F" => "Filter".to_string(),
1438        "DP" | "DecodeParms" => "DecodeParms".to_string(),
1439        "IM" => "ImageMask".to_string(),
1440        "I" => "Interpolate".to_string(),
1441        "Intent" => "Intent".to_string(),
1442        "D" => "Decode".to_string(),
1443        _ => key.to_string(),
1444    }
1445}
1446
1447/// Expand abbreviated inline image color space names
1448fn expand_inline_name(name: &str) -> String {
1449    match name {
1450        "G" => "DeviceGray".to_string(),
1451        "RGB" => "DeviceRGB".to_string(),
1452        "CMYK" => "DeviceCMYK".to_string(),
1453        "I" => "Indexed".to_string(),
1454        "AHx" => "ASCIIHexDecode".to_string(),
1455        "A85" => "ASCII85Decode".to_string(),
1456        "LZW" => "LZWDecode".to_string(),
1457        "Fl" => "FlateDecode".to_string(),
1458        "RL" => "RunLengthDecode".to_string(),
1459        "DCT" => "DCTDecode".to_string(),
1460        "CCF" => "CCITTFaxDecode".to_string(),
1461        _ => name.to_string(),
1462    }
1463}
1464
1465#[cfg(test)]
1466mod tests {
1467    use super::*;
1468
1469    #[test]
1470    fn test_tokenize_numbers() {
1471        let input = b"123 -45 3.14159 -0.5 .5";
1472        let mut tokenizer = ContentTokenizer::new(input);
1473
1474        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1475        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1476        assert_eq!(
1477            tokenizer.next_token().unwrap(),
1478            Some(Token::Number(3.14159))
1479        );
1480        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1481        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1482        assert_eq!(tokenizer.next_token().unwrap(), None);
1483    }
1484
1485    #[test]
1486    fn test_tokenize_strings() {
1487        let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1488        let mut tokenizer = ContentTokenizer::new(input);
1489
1490        assert_eq!(
1491            tokenizer.next_token().unwrap(),
1492            Some(Token::String(b"Hello World".to_vec()))
1493        );
1494        assert_eq!(
1495            tokenizer.next_token().unwrap(),
1496            Some(Token::String(b"Hello\nWorld".to_vec()))
1497        );
1498        assert_eq!(
1499            tokenizer.next_token().unwrap(),
1500            Some(Token::String(b"Nested (paren)".to_vec()))
1501        );
1502    }
1503
1504    #[test]
1505    fn test_tokenize_hex_strings() {
1506        let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1507        let mut tokenizer = ContentTokenizer::new(input);
1508
1509        assert_eq!(
1510            tokenizer.next_token().unwrap(),
1511            Some(Token::HexString(b"Hello".to_vec()))
1512        );
1513        assert_eq!(
1514            tokenizer.next_token().unwrap(),
1515            Some(Token::HexString(b"Hello".to_vec()))
1516        );
1517    }
1518
1519    #[test]
1520    fn test_tokenize_names() {
1521        let input = b"/Name /Name#20with#20spaces /A#42C";
1522        let mut tokenizer = ContentTokenizer::new(input);
1523
1524        assert_eq!(
1525            tokenizer.next_token().unwrap(),
1526            Some(Token::Name("Name".to_string()))
1527        );
1528        assert_eq!(
1529            tokenizer.next_token().unwrap(),
1530            Some(Token::Name("Name with spaces".to_string()))
1531        );
1532        assert_eq!(
1533            tokenizer.next_token().unwrap(),
1534            Some(Token::Name("ABC".to_string()))
1535        );
1536    }
1537
1538    #[test]
1539    fn test_tokenize_operators() {
1540        let input = b"BT Tj ET q Q";
1541        let mut tokenizer = ContentTokenizer::new(input);
1542
1543        assert_eq!(
1544            tokenizer.next_token().unwrap(),
1545            Some(Token::Operator("BT".to_string()))
1546        );
1547        assert_eq!(
1548            tokenizer.next_token().unwrap(),
1549            Some(Token::Operator("Tj".to_string()))
1550        );
1551        assert_eq!(
1552            tokenizer.next_token().unwrap(),
1553            Some(Token::Operator("ET".to_string()))
1554        );
1555        assert_eq!(
1556            tokenizer.next_token().unwrap(),
1557            Some(Token::Operator("q".to_string()))
1558        );
1559        assert_eq!(
1560            tokenizer.next_token().unwrap(),
1561            Some(Token::Operator("Q".to_string()))
1562        );
1563    }
1564
1565    #[test]
1566    fn test_parse_text_operators() {
1567        let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1568        let operators = ContentParser::parse(content).unwrap();
1569
1570        assert_eq!(operators.len(), 5);
1571        assert_eq!(operators[0], ContentOperation::BeginText);
1572        assert_eq!(
1573            operators[1],
1574            ContentOperation::SetFont("F1".to_string(), 12.0)
1575        );
1576        assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1577        assert_eq!(
1578            operators[3],
1579            ContentOperation::ShowText(b"Hello World".to_vec())
1580        );
1581        assert_eq!(operators[4], ContentOperation::EndText);
1582    }
1583
1584    #[test]
1585    fn test_parse_graphics_operators() {
1586        let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1587        let operators = ContentParser::parse(content).unwrap();
1588
1589        assert_eq!(operators.len(), 6);
1590        assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1591        assert_eq!(
1592            operators[1],
1593            ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1594        );
1595        assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1596        assert_eq!(
1597            operators[3],
1598            ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1599        );
1600        assert_eq!(operators[4], ContentOperation::Stroke);
1601        assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1602    }
1603
1604    #[test]
1605    fn test_parse_color_operators() {
1606        let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1607        let operators = ContentParser::parse(content).unwrap();
1608
1609        assert_eq!(operators.len(), 3);
1610        assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1611        assert_eq!(
1612            operators[1],
1613            ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1614        );
1615        assert_eq!(
1616            operators[2],
1617            ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1618        );
1619    }
1620
1621    // Comprehensive tests for all ContentOperation variants
1622    mod comprehensive_tests {
1623        use super::*;
1624
1625        #[test]
1626        fn test_all_text_operators() {
1627            // Test basic text operators that work with current parser
1628            let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1629            let operators = ContentParser::parse(content).unwrap();
1630
1631            assert_eq!(operators[0], ContentOperation::BeginText);
1632            assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1633            assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1634            assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1635            assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1636            assert_eq!(
1637                operators[5],
1638                ContentOperation::SetFont("F1".to_string(), 12.0)
1639            );
1640            assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1641            assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1642            assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1643            assert_eq!(
1644                operators[9],
1645                ContentOperation::MoveTextSetLeading(50.0, 150.0)
1646            );
1647            assert_eq!(operators[10], ContentOperation::NextLine);
1648            assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1649            assert_eq!(operators[12], ContentOperation::EndText);
1650        }
1651
1652        #[test]
1653        fn test_all_graphics_state_operators() {
1654            // Test basic graphics state operators without arrays
1655            let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1656            let operators = ContentParser::parse(content).unwrap();
1657
1658            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1659            assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1660            assert_eq!(
1661                operators[2],
1662                ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1663            );
1664            assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1665            assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1666            assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1667            assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1668            assert_eq!(
1669                operators[7],
1670                ContentOperation::SetGraphicsStateParams("GS1".to_string())
1671            );
1672            assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1673            assert_eq!(
1674                operators[9],
1675                ContentOperation::SetIntent("Perceptual".to_string())
1676            );
1677        }
1678
1679        #[test]
1680        fn test_all_path_construction_operators() {
1681            let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1682            let operators = ContentParser::parse(content).unwrap();
1683
1684            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1685            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1686            assert_eq!(
1687                operators[2],
1688                ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1689            );
1690            assert_eq!(
1691                operators[3],
1692                ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1693            );
1694            assert_eq!(
1695                operators[4],
1696                ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1697            );
1698            assert_eq!(operators[5], ContentOperation::ClosePath);
1699            assert_eq!(
1700                operators[6],
1701                ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1702            );
1703        }
1704
1705        #[test]
1706        fn test_all_path_painting_operators() {
1707            let content = b"S s f F f* B B* b b* n W W*";
1708            let operators = ContentParser::parse(content).unwrap();
1709
1710            assert_eq!(operators[0], ContentOperation::Stroke);
1711            assert_eq!(operators[1], ContentOperation::CloseStroke);
1712            assert_eq!(operators[2], ContentOperation::Fill);
1713            assert_eq!(operators[3], ContentOperation::Fill); // F is alias for f
1714            assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1715            assert_eq!(operators[5], ContentOperation::FillStroke);
1716            assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1717            assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1718            assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1719            assert_eq!(operators[9], ContentOperation::EndPath);
1720            assert_eq!(operators[10], ContentOperation::Clip);
1721            assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1722        }
1723
1724        #[test]
1725        fn test_all_color_operators() {
1726            // Test basic color operators that work with current parser
1727            let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1728            let operators = ContentParser::parse(content).unwrap();
1729
1730            assert_eq!(
1731                operators[0],
1732                ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1733            );
1734            assert_eq!(
1735                operators[1],
1736                ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1737            );
1738            assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1739            assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1740            assert_eq!(
1741                operators[4],
1742                ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1743            );
1744            assert_eq!(
1745                operators[5],
1746                ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1747            );
1748            assert_eq!(
1749                operators[6],
1750                ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1751            );
1752            assert_eq!(
1753                operators[7],
1754                ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1755            );
1756            assert_eq!(
1757                operators[8],
1758                ContentOperation::ShadingFill("Shade1".to_string())
1759            );
1760        }
1761
1762        #[test]
1763        fn test_xobject_and_marked_content_operators() {
1764            // Test basic XObject and marked content operators
1765            let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1766            let operators = ContentParser::parse(content).unwrap();
1767
1768            assert_eq!(
1769                operators[0],
1770                ContentOperation::PaintXObject("Image1".to_string())
1771            );
1772            assert_eq!(
1773                operators[1],
1774                ContentOperation::BeginMarkedContent("MC1".to_string())
1775            );
1776            assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1777            assert_eq!(
1778                operators[3],
1779                ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1780            );
1781            assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1782            assert_eq!(operators[5], ContentOperation::EndCompatibility);
1783        }
1784
1785        #[test]
1786        fn test_complex_content_stream() {
1787            let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1788            let operators = ContentParser::parse(content).unwrap();
1789
1790            assert_eq!(operators.len(), 8);
1791            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1792            assert_eq!(
1793                operators[1],
1794                ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1795            );
1796            assert_eq!(operators[2], ContentOperation::BeginText);
1797            assert_eq!(
1798                operators[3],
1799                ContentOperation::SetFont("F1".to_string(), 12.0)
1800            );
1801            assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1802            assert_eq!(
1803                operators[5],
1804                ContentOperation::ShowText(b"Complex".to_vec())
1805            );
1806            assert_eq!(operators[6], ContentOperation::EndText);
1807            assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1808        }
1809
1810        #[test]
1811        fn test_tokenizer_whitespace_handling() {
1812            let input = b"  \t\n\r  BT  \t\n  /F1   12.5  \t Tf  \n\r  ET  ";
1813            let mut tokenizer = ContentTokenizer::new(input);
1814
1815            assert_eq!(
1816                tokenizer.next_token().unwrap(),
1817                Some(Token::Operator("BT".to_string()))
1818            );
1819            assert_eq!(
1820                tokenizer.next_token().unwrap(),
1821                Some(Token::Name("F1".to_string()))
1822            );
1823            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1824            assert_eq!(
1825                tokenizer.next_token().unwrap(),
1826                Some(Token::Operator("Tf".to_string()))
1827            );
1828            assert_eq!(
1829                tokenizer.next_token().unwrap(),
1830                Some(Token::Operator("ET".to_string()))
1831            );
1832            assert_eq!(tokenizer.next_token().unwrap(), None);
1833        }
1834
1835        #[test]
1836        fn test_tokenizer_edge_cases() {
1837            // Test basic number formats that are actually supported
1838            let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1839            let mut tokenizer = ContentTokenizer::new(input);
1840
1841            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1842            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1843            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1844            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1845            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1846            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1847            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1848            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1849        }
1850
1851        #[test]
1852        fn test_string_parsing_edge_cases() {
1853            let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1854            let mut tokenizer = ContentTokenizer::new(input);
1855
1856            assert_eq!(
1857                tokenizer.next_token().unwrap(),
1858                Some(Token::String(b"Simple".to_vec()))
1859            );
1860            assert_eq!(
1861                tokenizer.next_token().unwrap(),
1862                Some(Token::String(b"With\\backslash".to_vec()))
1863            );
1864            assert_eq!(
1865                tokenizer.next_token().unwrap(),
1866                Some(Token::String(b"With)paren".to_vec()))
1867            );
1868            assert_eq!(
1869                tokenizer.next_token().unwrap(),
1870                Some(Token::String(b"With\newline".to_vec()))
1871            );
1872            assert_eq!(
1873                tokenizer.next_token().unwrap(),
1874                Some(Token::String(b"With\ttab".to_vec()))
1875            );
1876            assert_eq!(
1877                tokenizer.next_token().unwrap(),
1878                Some(Token::String(b"With\rcarriage".to_vec()))
1879            );
1880            assert_eq!(
1881                tokenizer.next_token().unwrap(),
1882                Some(Token::String(b"With\x08backspace".to_vec()))
1883            );
1884            assert_eq!(
1885                tokenizer.next_token().unwrap(),
1886                Some(Token::String(b"With\x0Cformfeed".to_vec()))
1887            );
1888            assert_eq!(
1889                tokenizer.next_token().unwrap(),
1890                Some(Token::String(b"With(leftparen".to_vec()))
1891            );
1892            assert_eq!(
1893                tokenizer.next_token().unwrap(),
1894                Some(Token::String(b"With)rightparen".to_vec()))
1895            );
1896        }
1897
1898        #[test]
1899        fn test_hex_string_parsing() {
1900            let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1901            let mut tokenizer = ContentTokenizer::new(input);
1902
1903            assert_eq!(
1904                tokenizer.next_token().unwrap(),
1905                Some(Token::HexString(b"Hello".to_vec()))
1906            );
1907            assert_eq!(
1908                tokenizer.next_token().unwrap(),
1909                Some(Token::HexString(b"Hello".to_vec()))
1910            );
1911            assert_eq!(
1912                tokenizer.next_token().unwrap(),
1913                Some(Token::HexString(b"HelloW".to_vec()))
1914            );
1915            assert_eq!(
1916                tokenizer.next_token().unwrap(),
1917                Some(Token::HexString(b"Hello\x50".to_vec()))
1918            );
1919        }
1920
1921        #[test]
1922        fn test_name_parsing_edge_cases() {
1923            let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1924            let mut tokenizer = ContentTokenizer::new(input);
1925
1926            assert_eq!(
1927                tokenizer.next_token().unwrap(),
1928                Some(Token::Name("Name".to_string()))
1929            );
1930            assert_eq!(
1931                tokenizer.next_token().unwrap(),
1932                Some(Token::Name("Name with spaces".to_string()))
1933            );
1934            assert_eq!(
1935                tokenizer.next_token().unwrap(),
1936                Some(Token::Name("Name#with#hash".to_string()))
1937            );
1938            assert_eq!(
1939                tokenizer.next_token().unwrap(),
1940                Some(Token::Name("Name/with/slash".to_string()))
1941            );
1942            assert_eq!(
1943                tokenizer.next_token().unwrap(),
1944                Some(Token::Name("EmptyName".to_string()))
1945            );
1946        }
1947
1948        #[test]
1949        fn test_operator_parsing_edge_cases() {
1950            let content = b"q q q Q Q Q BT BT ET ET";
1951            let operators = ContentParser::parse(content).unwrap();
1952
1953            assert_eq!(operators.len(), 10);
1954            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1955            assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1956            assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1957            assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1958            assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1959            assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1960            assert_eq!(operators[6], ContentOperation::BeginText);
1961            assert_eq!(operators[7], ContentOperation::BeginText);
1962            assert_eq!(operators[8], ContentOperation::EndText);
1963            assert_eq!(operators[9], ContentOperation::EndText);
1964        }
1965
1966        #[test]
1967        fn test_error_handling_insufficient_operands() {
1968            let content = b"100 Td"; // Missing y coordinate
1969            let result = ContentParser::parse(content);
1970            assert!(result.is_err());
1971        }
1972
1973        #[test]
1974        fn test_error_handling_invalid_operator() {
1975            let content = b"100 200 INVALID";
1976            let result = ContentParser::parse(content);
1977            assert!(result.is_err());
1978        }
1979
1980        #[test]
1981        fn test_error_handling_malformed_string() {
1982            // Test that the tokenizer handles malformed strings appropriately
1983            let input = b"(Unclosed string";
1984            let mut tokenizer = ContentTokenizer::new(input);
1985            let result = tokenizer.next_token();
1986            // The current implementation may not detect this as an error
1987            // so we'll just test that we get some result
1988            assert!(result.is_ok() || result.is_err());
1989        }
1990
1991        #[test]
1992        fn test_error_handling_malformed_hex_string() {
1993            let input = b"<48656C6C6G>";
1994            let mut tokenizer = ContentTokenizer::new(input);
1995            let result = tokenizer.next_token();
1996            assert!(result.is_err());
1997        }
1998
1999        #[test]
2000        fn test_error_handling_malformed_name() {
2001            let input = b"/Name#GG";
2002            let mut tokenizer = ContentTokenizer::new(input);
2003            let result = tokenizer.next_token();
2004            assert!(result.is_err());
2005        }
2006
2007        #[test]
2008        fn test_empty_content_stream() {
2009            let content = b"";
2010            let operators = ContentParser::parse(content).unwrap();
2011            assert_eq!(operators.len(), 0);
2012        }
2013
2014        #[test]
2015        fn test_whitespace_only_content_stream() {
2016            let content = b"   \t\n\r   ";
2017            let operators = ContentParser::parse(content).unwrap();
2018            assert_eq!(operators.len(), 0);
2019        }
2020
2021        #[test]
2022        fn test_mixed_integer_and_real_operands() {
2023            // Test with simple operands that work with current parser
2024            let content = b"100 200 m 150 200 l";
2025            let operators = ContentParser::parse(content).unwrap();
2026
2027            assert_eq!(operators.len(), 2);
2028            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2029            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2030        }
2031
2032        #[test]
2033        fn test_negative_operands() {
2034            let content = b"-100 -200 Td -50.5 -75.2 TD";
2035            let operators = ContentParser::parse(content).unwrap();
2036
2037            assert_eq!(operators.len(), 2);
2038            assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2039            assert_eq!(
2040                operators[1],
2041                ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2042            );
2043        }
2044
2045        #[test]
2046        fn test_large_numbers() {
2047            let content = b"999999.999999 -999999.999999 m";
2048            let operators = ContentParser::parse(content).unwrap();
2049
2050            assert_eq!(operators.len(), 1);
2051            assert_eq!(
2052                operators[0],
2053                ContentOperation::MoveTo(999999.999999, -999999.999999)
2054            );
2055        }
2056
2057        #[test]
2058        fn test_scientific_notation() {
2059            // Test with simple decimal numbers since scientific notation isn't implemented
2060            let content = b"123.45 -456.78 m";
2061            let operators = ContentParser::parse(content).unwrap();
2062
2063            assert_eq!(operators.len(), 1);
2064            assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2065        }
2066
2067        #[test]
2068        fn test_show_text_array_complex() {
2069            // Test simple text array without complex syntax
2070            let content = b"(Hello) TJ";
2071            let result = ContentParser::parse(content);
2072            // This should fail since TJ expects array, but test the error handling
2073            assert!(result.is_err());
2074        }
2075
2076        #[test]
2077        fn test_dash_pattern_empty() {
2078            // Test simple dash pattern without array syntax
2079            let content = b"0 d";
2080            let result = ContentParser::parse(content);
2081            // This should fail since dash pattern needs array, but test the error handling
2082            assert!(result.is_err());
2083        }
2084
2085        #[test]
2086        fn test_dash_pattern_complex() {
2087            // Test simple dash pattern without complex array syntax
2088            let content = b"2.5 d";
2089            let result = ContentParser::parse(content);
2090            // This should fail since dash pattern needs array, but test the error handling
2091            assert!(result.is_err());
2092        }
2093
2094        #[test]
2095        fn test_pop_array_removes_array_end() {
2096            // Test that pop_array correctly handles ArrayEnd tokens
2097            let parser = ContentParser::new(b"");
2098
2099            // Test normal array: [1 2 3]
2100            let mut operands = vec![
2101                Token::ArrayStart,
2102                Token::Integer(1),
2103                Token::Integer(2),
2104                Token::Integer(3),
2105                Token::ArrayEnd,
2106            ];
2107            let result = parser.pop_array(&mut operands).unwrap();
2108            assert_eq!(result.len(), 3);
2109            assert!(operands.is_empty());
2110
2111            // Test array without ArrayEnd (backwards compatibility)
2112            let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2113            let result = parser.pop_array(&mut operands).unwrap();
2114            assert_eq!(result.len(), 2);
2115            assert!(operands.is_empty());
2116        }
2117
2118        #[test]
2119        fn test_dash_array_parsing_valid() {
2120            // Test that parser correctly parses valid dash arrays
2121            let parser = ContentParser::new(b"");
2122
2123            // Test with valid numbers only
2124            let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2125            let result = parser.parse_dash_array(valid_tokens).unwrap();
2126            assert_eq!(result, vec![3.0, 2.0]);
2127
2128            // Test empty dash array
2129            let empty_tokens = vec![];
2130            let result = parser.parse_dash_array(empty_tokens).unwrap();
2131            let expected: Vec<f32> = vec![];
2132            assert_eq!(result, expected);
2133        }
2134
2135        #[test]
2136        fn test_text_array_parsing_valid() {
2137            // Test that parser correctly parses valid text arrays
2138            let parser = ContentParser::new(b"");
2139
2140            // Test with valid elements only
2141            let valid_tokens = vec![
2142                Token::String(b"Hello".to_vec()),
2143                Token::Number(-100.0),
2144                Token::String(b"World".to_vec()),
2145            ];
2146            let result = parser.parse_text_array(valid_tokens).unwrap();
2147            assert_eq!(result.len(), 3);
2148        }
2149
2150        #[test]
2151        fn test_inline_image_handling() {
2152            let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2153            let operators = ContentParser::parse(content).unwrap();
2154
2155            assert_eq!(operators.len(), 1);
2156            match &operators[0] {
2157                ContentOperation::InlineImage { params, data: _ } => {
2158                    // Check parsed parameters
2159                    assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2160                    assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2161                    assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2162                    assert_eq!(
2163                        params.get("ColorSpace"),
2164                        Some(&Object::Name("DeviceRGB".to_string()))
2165                    );
2166                    // Data field is not captured, just verify params
2167                }
2168                _ => panic!("Expected InlineImage operation"),
2169            }
2170        }
2171
2172        #[test]
2173        fn test_inline_image_with_filter() {
2174            let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2175            let operators = ContentParser::parse(content).unwrap();
2176
2177            assert_eq!(operators.len(), 1);
2178            match &operators[0] {
2179                ContentOperation::InlineImage { params, data: _ } => {
2180                    assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2181                    assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2182                    assert_eq!(
2183                        params.get("ColorSpace"),
2184                        Some(&Object::Name("DeviceGray".to_string()))
2185                    );
2186                    assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2187                    assert_eq!(
2188                        params.get("Filter"),
2189                        Some(&Object::Name("ASCIIHexDecode".to_string()))
2190                    );
2191                }
2192                _ => panic!("Expected InlineImage operation"),
2193            }
2194        }
2195
2196        #[test]
2197        fn test_content_parser_performance() {
2198            let mut content = Vec::new();
2199            for i in 0..1000 {
2200                content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2201            }
2202
2203            let start = std::time::Instant::now();
2204            let operators = ContentParser::parse(&content).unwrap();
2205            let duration = start.elapsed();
2206
2207            assert_eq!(operators.len(), 1000);
2208            assert!(duration.as_millis() < 100); // Should parse 1000 operators in under 100ms
2209        }
2210
2211        #[test]
2212        fn test_tokenizer_performance() {
2213            let mut input = Vec::new();
2214            for i in 0..1000 {
2215                input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2216            }
2217
2218            let start = std::time::Instant::now();
2219            let mut tokenizer = ContentTokenizer::new(&input);
2220            let mut count = 0;
2221            while tokenizer.next_token().unwrap().is_some() {
2222                count += 1;
2223            }
2224            let duration = start.elapsed();
2225
2226            assert_eq!(count, 2000); // 1000 pairs of numbers
2227            assert!(duration.as_millis() < 50); // Should tokenize 2000 tokens in under 50ms
2228        }
2229
2230        #[test]
2231        fn test_memory_usage_large_content() {
2232            let mut content = Vec::new();
2233            for i in 0..10000 {
2234                content.extend_from_slice(
2235                    format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2236                        .as_bytes(),
2237                );
2238            }
2239
2240            let operators = ContentParser::parse(&content).unwrap();
2241            assert_eq!(operators.len(), 10000);
2242
2243            // Verify all operations are CurveTo
2244            for op in operators {
2245                matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2246            }
2247        }
2248
2249        #[test]
2250        fn test_concurrent_parsing() {
2251            use std::sync::Arc;
2252            use std::thread;
2253
2254            let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2255            let handles: Vec<_> = (0..10)
2256                .map(|_| {
2257                    let content_clone = content.clone();
2258                    thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2259                })
2260                .collect();
2261
2262            for handle in handles {
2263                let operators = handle.join().unwrap();
2264                assert_eq!(operators.len(), 5);
2265                assert_eq!(operators[0], ContentOperation::BeginText);
2266                assert_eq!(operators[4], ContentOperation::EndText);
2267            }
2268        }
2269
2270        // ========== NEW COMPREHENSIVE TESTS ==========
2271
2272        #[test]
2273        fn test_tokenizer_hex_string_edge_cases() {
2274            let mut tokenizer = ContentTokenizer::new(b"<>");
2275            let token = tokenizer.next_token().unwrap().unwrap();
2276            match token {
2277                Token::HexString(data) => assert!(data.is_empty()),
2278                _ => panic!("Expected empty hex string"),
2279            }
2280
2281            // Odd number of hex digits
2282            let mut tokenizer = ContentTokenizer::new(b"<123>");
2283            let token = tokenizer.next_token().unwrap().unwrap();
2284            match token {
2285                Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2286                _ => panic!("Expected hex string with odd digits"),
2287            }
2288
2289            // Hex string with whitespace
2290            let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2291            let token = tokenizer.next_token().unwrap().unwrap();
2292            match token {
2293                Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2294                _ => panic!("Expected hex string with whitespace"),
2295            }
2296        }
2297
2298        #[test]
2299        fn test_tokenizer_literal_string_escape_sequences() {
2300            // Test all standard escape sequences
2301            let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2302            let token = tokenizer.next_token().unwrap().unwrap();
2303            match token {
2304                Token::String(data) => {
2305                    assert_eq!(
2306                        data,
2307                        vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2308                    );
2309                }
2310                _ => panic!("Expected string with escapes"),
2311            }
2312
2313            // Test octal escape sequences
2314            let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2315            let token = tokenizer.next_token().unwrap().unwrap();
2316            match token {
2317                Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2318                _ => panic!("Expected string with octal escapes"),
2319            }
2320        }
2321
2322        #[test]
2323        fn test_tokenizer_nested_parentheses() {
2324            let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2325            let token = tokenizer.next_token().unwrap().unwrap();
2326            match token {
2327                Token::String(data) => {
2328                    assert_eq!(data, b"outer (inner) text");
2329                }
2330                _ => panic!("Expected string with nested parentheses"),
2331            }
2332
2333            // Multiple levels of nesting
2334            let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2335            let token = tokenizer.next_token().unwrap().unwrap();
2336            match token {
2337                Token::String(data) => {
2338                    assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2339                }
2340                _ => panic!("Expected string with deep nesting"),
2341            }
2342        }
2343
2344        #[test]
2345        fn test_tokenizer_name_hex_escapes() {
2346            let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2347            let token = tokenizer.next_token().unwrap().unwrap();
2348            match token {
2349                Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2350                _ => panic!("Expected name with hex escapes"),
2351            }
2352
2353            // Test various special characters
2354            let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2355            let token = tokenizer.next_token().unwrap().unwrap();
2356            match token {
2357                Token::Name(name) => assert_eq!(name, "Special/()<>"),
2358                _ => panic!("Expected name with special character escapes"),
2359            }
2360        }
2361
2362        #[test]
2363        fn test_tokenizer_number_edge_cases() {
2364            // Very large integers
2365            let mut tokenizer = ContentTokenizer::new(b"2147483647");
2366            let token = tokenizer.next_token().unwrap().unwrap();
2367            match token {
2368                Token::Integer(n) => assert_eq!(n, 2147483647),
2369                _ => panic!("Expected large integer"),
2370            }
2371
2372            // Very small numbers
2373            let mut tokenizer = ContentTokenizer::new(b"0.00001");
2374            let token = tokenizer.next_token().unwrap().unwrap();
2375            match token {
2376                Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2377                _ => panic!("Expected small float"),
2378            }
2379
2380            // Numbers starting with dot
2381            let mut tokenizer = ContentTokenizer::new(b".5");
2382            let token = tokenizer.next_token().unwrap().unwrap();
2383            match token {
2384                Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2385                _ => panic!("Expected float starting with dot"),
2386            }
2387        }
2388
2389        #[test]
2390        fn test_parser_complex_path_operations() {
2391            let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2392            let operators = ContentParser::parse(content).unwrap();
2393
2394            assert_eq!(operators.len(), 6);
2395            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2396            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2397            assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2398            assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2399            assert_eq!(operators[4], ContentOperation::ClosePath);
2400            assert_eq!(operators[5], ContentOperation::Fill);
2401        }
2402
2403        #[test]
2404        fn test_parser_bezier_curves() {
2405            let content = b"100 100 150 50 200 150 c";
2406            let operators = ContentParser::parse(content).unwrap();
2407
2408            assert_eq!(operators.len(), 1);
2409            match &operators[0] {
2410                ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2411                    // Values are parsed in reverse order: last 6 values for c operator
2412                    // Stack order: 100 100 150 50 200 150
2413                    // Pop order: x1=100, y1=100, x2=150, y2=50, x3=200, y3=150
2414                    assert!(x1.is_finite() && y1.is_finite());
2415                    assert!(x2.is_finite() && y2.is_finite());
2416                    assert!(x3.is_finite() && y3.is_finite());
2417                    // Verify we have 6 coordinate values
2418                    assert!(*x1 >= 50.0 && *x1 <= 200.0);
2419                    assert!(*y1 >= 50.0 && *y1 <= 200.0);
2420                }
2421                _ => panic!("Expected CurveTo operation"),
2422            }
2423        }
2424
2425        #[test]
2426        fn test_parser_color_operations() {
2427            let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2428            let operators = ContentParser::parse(content).unwrap();
2429
2430            assert_eq!(operators.len(), 5);
2431            match &operators[0] {
2432                ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2433                _ => panic!("Expected SetNonStrokingGray"),
2434            }
2435            match &operators[1] {
2436                ContentOperation::SetNonStrokingRGB(r, g, b) => {
2437                    assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2438                }
2439                _ => panic!("Expected SetNonStrokingRGB"),
2440            }
2441        }
2442
2443        #[test]
2444        fn test_parser_text_positioning_advanced() {
2445            let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2446            let operators = ContentParser::parse(content).unwrap();
2447
2448            assert_eq!(operators.len(), 7);
2449            assert_eq!(operators[0], ContentOperation::BeginText);
2450            match &operators[1] {
2451                ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2452                    assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2453                }
2454                _ => panic!("Expected SetTextMatrix"),
2455            }
2456            assert_eq!(operators[6], ContentOperation::EndText);
2457        }
2458
2459        #[test]
2460        fn test_parser_graphics_state_operations() {
2461            let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2462            let operators = ContentParser::parse(content).unwrap();
2463
2464            assert_eq!(operators.len(), 7);
2465            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2466            match &operators[1] {
2467                ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2468                    assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2469                }
2470                _ => panic!("Expected SetTransformMatrix"),
2471            }
2472            assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2473        }
2474
2475        #[test]
2476        fn test_parser_xobject_operations() {
2477            let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2478            let operators = ContentParser::parse(content).unwrap();
2479
2480            assert_eq!(operators.len(), 3);
2481            for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2482                match &operators[i] {
2483                    ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2484                    _ => panic!("Expected PaintXObject"),
2485                }
2486            }
2487        }
2488
2489        #[test]
2490        fn test_parser_marked_content_operations() {
2491            let content = b"/P BMC (Tagged content) Tj EMC";
2492            let operators = ContentParser::parse(content).unwrap();
2493
2494            assert_eq!(operators.len(), 3);
2495            match &operators[0] {
2496                ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2497                _ => panic!("Expected BeginMarkedContent"),
2498            }
2499            assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2500        }
2501
2502        #[test]
2503        fn test_parser_error_handling_invalid_operators() {
2504            // Missing operands for move operator
2505            let content = b"m";
2506            let result = ContentParser::parse(content);
2507            assert!(result.is_err());
2508
2509            // Invalid hex string (no closing >)
2510            let content = b"<ABC DEF BT";
2511            let result = ContentParser::parse(content);
2512            assert!(result.is_err());
2513
2514            // Test that we can detect actual parsing errors
2515            let content = b"100 200 300"; // Numbers without operator should parse ok
2516            let result = ContentParser::parse(content);
2517            assert!(result.is_ok()); // This should actually be ok since no operator is attempted
2518        }
2519
2520        #[test]
2521        fn test_parser_whitespace_tolerance() {
2522            let content = b"  \n\t  100   \r\n  200  \t m  \n";
2523            let operators = ContentParser::parse(content).unwrap();
2524
2525            assert_eq!(operators.len(), 1);
2526            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2527        }
2528
2529        #[test]
2530        fn test_tokenizer_comment_handling() {
2531            let content = b"100 % This is a comment\n200 m % Another comment";
2532            let operators = ContentParser::parse(content).unwrap();
2533
2534            assert_eq!(operators.len(), 1);
2535            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2536        }
2537
2538        #[test]
2539        fn test_parser_stream_with_binary_data() {
2540            // Test content stream with comment containing binary-like data
2541            let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2542
2543            let operators = ContentParser::parse(content).unwrap();
2544            assert_eq!(operators.len(), 2);
2545            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2546            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2547        }
2548
2549        #[test]
2550        fn test_tokenizer_array_parsing() {
2551            // Test simple operations that don't require complex array parsing
2552            let content = b"100 200 m 150 250 l";
2553            let operators = ContentParser::parse(content).unwrap();
2554
2555            assert_eq!(operators.len(), 2);
2556            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2557            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2558        }
2559
2560        #[test]
2561        fn test_parser_rectangle_operations() {
2562            let content = b"10 20 100 50 re 0 0 200 300 re";
2563            let operators = ContentParser::parse(content).unwrap();
2564
2565            assert_eq!(operators.len(), 2);
2566            match &operators[0] {
2567                ContentOperation::Rectangle(x, y, width, height) => {
2568                    assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2569                }
2570                _ => panic!("Expected Rectangle operation"),
2571            }
2572            match &operators[1] {
2573                ContentOperation::Rectangle(x, y, width, height) => {
2574                    assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2575                }
2576                _ => panic!("Expected Rectangle operation"),
2577            }
2578        }
2579
2580        #[test]
2581        fn test_parser_clipping_operations() {
2582            let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2583            let operators = ContentParser::parse(content).unwrap();
2584
2585            assert_eq!(operators.len(), 6);
2586            assert_eq!(operators[1], ContentOperation::Clip);
2587            assert_eq!(operators[2], ContentOperation::EndPath);
2588            assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2589            assert_eq!(operators[5], ContentOperation::EndPath);
2590        }
2591
2592        #[test]
2593        fn test_parser_painting_operations() {
2594            let content = b"S s f f* B B* b b*";
2595            let operators = ContentParser::parse(content).unwrap();
2596
2597            assert_eq!(operators.len(), 8);
2598            assert_eq!(operators[0], ContentOperation::Stroke);
2599            assert_eq!(operators[1], ContentOperation::CloseStroke);
2600            assert_eq!(operators[2], ContentOperation::Fill);
2601            assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2602            assert_eq!(operators[4], ContentOperation::FillStroke);
2603            assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2604            assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2605            assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2606        }
2607
2608        #[test]
2609        fn test_parser_line_style_operations() {
2610            let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2611            let operators = ContentParser::parse(content).unwrap();
2612
2613            assert_eq!(operators.len(), 5);
2614            assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2615            assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2616            assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2617            assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2618            // Dash pattern test would need array support
2619        }
2620
2621        #[test]
2622        fn test_parser_text_state_operations() {
2623            let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2624            let operators = ContentParser::parse(content).unwrap();
2625
2626            assert_eq!(operators.len(), 5);
2627            assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2628            assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2629            assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2630            assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2631            assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2632        }
2633
2634        #[test]
2635        fn test_parser_unicode_text() {
2636            let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2637            let operators = ContentParser::parse(content).unwrap();
2638
2639            assert_eq!(operators.len(), 3);
2640            assert_eq!(operators[0], ContentOperation::BeginText);
2641            match &operators[1] {
2642                ContentOperation::ShowText(text) => {
2643                    assert!(text.len() > 5); // Should contain Unicode bytes
2644                }
2645                _ => panic!("Expected ShowText operation"),
2646            }
2647            assert_eq!(operators[2], ContentOperation::EndText);
2648        }
2649
2650        #[test]
2651        fn test_parser_stress_test_large_coordinates() {
2652            let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2653            let operators = ContentParser::parse(content).unwrap();
2654
2655            assert_eq!(operators.len(), 1);
2656            match &operators[0] {
2657                ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2658                    assert!((*_x1 - 999999.999).abs() < 0.1);
2659                    assert!((*_y1 - (-999999.999)).abs() < 0.1);
2660                    assert!((*_x3 - 999999.999).abs() < 0.1);
2661                }
2662                _ => panic!("Expected CurveTo operation"),
2663            }
2664        }
2665
2666        #[test]
2667        fn test_parser_empty_content_stream() {
2668            let content = b"";
2669            let operators = ContentParser::parse(content).unwrap();
2670            assert!(operators.is_empty());
2671
2672            let content = b"   \n\t\r   ";
2673            let operators = ContentParser::parse(content).unwrap();
2674            assert!(operators.is_empty());
2675        }
2676
2677        #[test]
2678        fn test_tokenizer_error_recovery() {
2679            // Test that parser can handle malformed but recoverable content
2680            let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2681            let result = ContentParser::parse(content);
2682            // Should either parse successfully or fail gracefully
2683            assert!(result.is_ok() || result.is_err());
2684        }
2685
2686        #[test]
2687        fn test_parser_optimization_repeated_operations() {
2688            // Test performance with many repeated operations
2689            let mut content = Vec::new();
2690            for i in 0..1000 {
2691                content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2692            }
2693
2694            let start = std::time::Instant::now();
2695            let operators = ContentParser::parse(&content).unwrap();
2696            let duration = start.elapsed();
2697
2698            assert_eq!(operators.len(), 1000);
2699            assert!(duration.as_millis() < 200); // Should be fast
2700        }
2701
2702        #[test]
2703        fn test_parser_memory_efficiency_large_strings() {
2704            // Test with large text content
2705            let large_text = "A".repeat(10000);
2706            let content = format!("BT ({}) Tj ET", large_text);
2707            let operators = ContentParser::parse(content.as_bytes()).unwrap();
2708
2709            assert_eq!(operators.len(), 3);
2710            match &operators[1] {
2711                ContentOperation::ShowText(text) => {
2712                    assert_eq!(text.len(), 10000);
2713                }
2714                _ => panic!("Expected ShowText operation"),
2715            }
2716        }
2717    }
2718
2719    #[test]
2720    fn test_content_stream_too_large() {
2721        // Test handling of very large content streams (covering potential size limits)
2722        let mut large_content = Vec::new();
2723
2724        // Create a content stream with many operations
2725        for i in 0..10000 {
2726            large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
2727        }
2728        large_content.extend_from_slice(b"S");
2729
2730        // Should handle large content without panic
2731        let result = ContentParser::parse_content(&large_content);
2732        assert!(result.is_ok());
2733
2734        let operations = result.unwrap();
2735        // Should have many MoveTo operations plus one Stroke
2736        assert!(operations.len() > 10000);
2737    }
2738
2739    #[test]
2740    fn test_invalid_operator_handling() {
2741        // Test parsing with invalid operators
2742        let content = b"100 200 INVALID_OP 300 400 m";
2743        let result = ContentParser::parse_content(content);
2744
2745        // Should either handle gracefully or return error
2746        if let Ok(operations) = result {
2747            // If it succeeds, should have at least the valid MoveTo
2748            assert!(operations
2749                .iter()
2750                .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
2751        }
2752    }
2753
2754    #[test]
2755    fn test_nested_arrays_malformed() {
2756        // Test malformed nested arrays in TJ operator
2757        let content = b"[[(Hello] [World)]] TJ";
2758        let result = ContentParser::parse_content(content);
2759
2760        // Should handle malformed arrays gracefully
2761        assert!(result.is_ok() || result.is_err());
2762    }
2763
2764    #[test]
2765    fn test_escape_sequences_in_strings() {
2766        // Test various escape sequences in strings
2767        let test_cases = vec![
2768            (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
2769            (b"(\\\\)".as_slice(), b"\\".as_slice()),
2770            (b"(\\(\\))".as_slice(), b"()".as_slice()),
2771            (b"(\\123)".as_slice(), b"S".as_slice()), // Octal 123 = 83 = 'S'
2772            (b"(\\0)".as_slice(), b"\0".as_slice()),
2773        ];
2774
2775        for (input, expected) in test_cases {
2776            let mut content = Vec::new();
2777            content.extend_from_slice(input);
2778            content.extend_from_slice(b" Tj");
2779
2780            let result = ContentParser::parse_content(&content);
2781            assert!(result.is_ok());
2782
2783            let operations = result.unwrap();
2784            if let ContentOperation::ShowText(text) = &operations[0] {
2785                assert_eq!(text, expected, "Failed for input: {:?}", input);
2786            } else {
2787                panic!("Expected ShowText operation");
2788            }
2789        }
2790    }
2791
2792    #[test]
2793    fn test_content_with_inline_images() {
2794        // Test handling of inline images in content stream
2795        let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
2796        let result = ContentParser::parse_content(content);
2797
2798        // Should handle inline images (even if not fully implemented)
2799        assert!(result.is_ok() || result.is_err());
2800    }
2801
2802    #[test]
2803    fn test_operator_with_missing_operands() {
2804        // Test operators with insufficient operands
2805        let test_cases = vec![
2806            b"Tj" as &[u8], // ShowText without string
2807            b"m",           // MoveTo without coordinates
2808            b"rg",          // SetRGBColor without values
2809            b"Tf",          // SetFont without name and size
2810        ];
2811
2812        for content in test_cases {
2813            let result = ContentParser::parse_content(content);
2814            // Should handle gracefully (error or skip)
2815            assert!(result.is_ok() || result.is_err());
2816        }
2817    }
2818}