oxidize_pdf/parser/
content.rs

1//! PDF Content Stream Parser - Complete support for PDF graphics operators
2//!
3//! This module implements comprehensive parsing of PDF content streams according to the PDF specification.
4//! Content streams contain the actual drawing instructions (operators) that render text, graphics, and images
5//! on PDF pages.
6//!
7//! # Overview
8//!
9//! Content streams are sequences of PDF operators that describe:
10//! - Text positioning and rendering
11//! - Path construction and painting
12//! - Color and graphics state management
13//! - Image and XObject placement
14//! - Coordinate transformations
15//!
16//! # Architecture
17//!
18//! The parser is divided into two main components:
19//! - `ContentTokenizer`: Low-level tokenization of content stream bytes
20//! - `ContentParser`: High-level parsing of tokens into structured operations
21//!
22//! # Example
23//!
24//! ```rust,no_run
25//! use oxidize_pdf::parser::content::{ContentParser, ContentOperation};
26//!
27//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
28//! // Parse a content stream
29//! let content_stream = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
30//! let operations = ContentParser::parse_content(content_stream)?;
31//!
32//! // Process operations
33//! for op in operations {
34//!     match op {
35//!         ContentOperation::BeginText => println!("Start text object"),
36//!         ContentOperation::SetFont(name, size) => println!("Font: {} at {}", name, size),
37//!         ContentOperation::ShowText(text) => println!("Text: {:?}", text),
38//!         _ => {}
39//!     }
40//! }
41//! # Ok(())
42//! # }
43//! ```
44//!
45//! # Supported Operators
46//!
47//! This parser supports all standard PDF operators including:
48//! - Text operators (BT, ET, Tj, TJ, Tf, Td, etc.)
49//! - Graphics state operators (q, Q, cm, w, J, etc.)
50//! - Path construction operators (m, l, c, re, h)
51//! - Path painting operators (S, f, B, n, etc.)
52//! - Color operators (g, rg, k, cs, scn, etc.)
53//! - XObject operators (Do)
54//! - Marked content operators (BMC, BDC, EMC, etc.)
55
56use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60/// Represents a single operator in a PDF content stream.
61///
62/// Each variant corresponds to a specific PDF operator and carries the associated
63/// operands. These operations form a complete instruction set for rendering PDF content.
64///
65/// # Categories
66///
67/// Operations are grouped into several categories:
68/// - **Text Object**: BeginText, EndText
69/// - **Text State**: Font, spacing, scaling, rendering mode
70/// - **Text Positioning**: Matrix transforms, moves, line advances
71/// - **Text Showing**: Display text with various formatting
72/// - **Graphics State**: Save/restore, transforms, line properties
73/// - **Path Construction**: Move, line, curve, rectangle operations
74/// - **Path Painting**: Stroke, fill, clipping operations
75/// - **Color**: RGB, CMYK, grayscale, and color space operations
76/// - **XObject**: External graphics and form placement
77/// - **Marked Content**: Semantic tagging for accessibility
78///
79/// # Example
80///
81/// ```rust
82/// use oxidize_pdf::parser::content::{ContentOperation};
83///
84/// // Text operation
85/// let op1 = ContentOperation::ShowText(b"Hello".to_vec());
86///
87/// // Graphics operation
88/// let op2 = ContentOperation::SetLineWidth(2.0);
89///
90/// // Path operation
91/// let op3 = ContentOperation::Rectangle(10.0, 10.0, 100.0, 50.0);
92/// ```
93#[derive(Debug, Clone, PartialEq)]
94pub enum ContentOperation {
95    // Text object operators
96    /// Begin a text object (BT operator).
97    /// All text showing operations must occur within a text object.
98    BeginText,
99
100    /// End a text object (ET operator).
101    /// Closes the current text object started with BeginText.
102    EndText,
103
104    // Text state operators
105    /// Set character spacing (Tc operator).
106    /// Additional space between characters in unscaled text units.
107    SetCharSpacing(f32),
108
109    /// Set word spacing (Tw operator).
110    /// Additional space for ASCII space character (0x20) in unscaled text units.
111    SetWordSpacing(f32),
112
113    /// Set horizontal text scaling (Tz operator).
114    /// Percentage of normal width (100 = normal).
115    SetHorizontalScaling(f32),
116
117    /// Set text leading (TL operator).
118    /// Vertical distance between baselines for T* operator.
119    SetLeading(f32),
120
121    /// Set font and size (Tf operator).
122    /// Font name must match a key in the Resources/Font dictionary.
123    SetFont(String, f32),
124
125    /// Set text rendering mode (Tr operator).
126    /// 0=fill, 1=stroke, 2=fill+stroke, 3=invisible, 4=fill+clip, 5=stroke+clip, 6=fill+stroke+clip, 7=clip
127    SetTextRenderMode(i32),
128
129    /// Set text rise (Ts operator).
130    /// Vertical displacement for superscripts/subscripts in text units.
131    SetTextRise(f32),
132
133    // Text positioning operators
134    /// Move text position (Td operator).
135    /// Translates the text matrix by (tx, ty).
136    MoveText(f32, f32),
137
138    /// Move text position and set leading (TD operator).
139    /// Equivalent to: -ty TL tx ty Td
140    MoveTextSetLeading(f32, f32),
141
142    /// Set text matrix directly (Tm operator).
143    /// Parameters: [a, b, c, d, e, f] for transformation matrix.
144    SetTextMatrix(f32, f32, f32, f32, f32, f32),
145
146    /// Move to start of next line (T* operator).
147    /// Uses the current leading value set with TL.
148    NextLine,
149
150    // Text showing operators
151    /// Show text string (Tj operator).
152    /// The bytes are encoded according to the current font's encoding.
153    ShowText(Vec<u8>),
154
155    /// Show text with individual positioning (TJ operator).
156    /// Array elements can be strings or position adjustments.
157    ShowTextArray(Vec<TextElement>),
158
159    /// Move to next line and show text (' operator).
160    /// Equivalent to: T* string Tj
161    NextLineShowText(Vec<u8>),
162
163    /// Set spacing, move to next line, and show text (" operator).
164    /// Equivalent to: word_spacing Tw char_spacing Tc string '
165    SetSpacingNextLineShowText(f32, f32, Vec<u8>),
166
167    // Graphics state operators
168    /// Save current graphics state (q operator).
169    /// Pushes the entire graphics state onto a stack.
170    SaveGraphicsState,
171
172    /// Restore graphics state (Q operator).
173    /// Pops the graphics state from the stack.
174    RestoreGraphicsState,
175
176    /// Concatenate matrix to current transformation matrix (cm operator).
177    /// Modifies the CTM: CTM' = CTM × [a b c d e f]
178    SetTransformMatrix(f32, f32, f32, f32, f32, f32),
179
180    /// Set line width (w operator) in user space units.
181    SetLineWidth(f32),
182
183    /// Set line cap style (J operator).
184    /// 0=butt cap, 1=round cap, 2=projecting square cap
185    SetLineCap(i32),
186
187    /// Set line join style (j operator).
188    /// 0=miter join, 1=round join, 2=bevel join
189    SetLineJoin(i32),
190
191    /// Set miter limit (M operator).
192    /// Maximum ratio of miter length to line width.
193    SetMiterLimit(f32),
194
195    /// Set dash pattern (d operator).
196    /// Array of dash/gap lengths and starting phase.
197    SetDashPattern(Vec<f32>, f32),
198
199    /// Set rendering intent (ri operator).
200    /// Color rendering intent: /AbsoluteColorimetric, /RelativeColorimetric, /Saturation, /Perceptual
201    SetIntent(String),
202
203    /// Set flatness tolerance (i operator).
204    /// Maximum error when rendering curves as line segments.
205    SetFlatness(f32),
206
207    /// Set graphics state from parameter dictionary (gs operator).
208    /// References ExtGState resource dictionary.
209    SetGraphicsStateParams(String),
210
211    // Path construction operators
212    /// Begin new subpath at point (m operator).
213    MoveTo(f32, f32),
214
215    /// Append straight line segment (l operator).
216    LineTo(f32, f32),
217
218    /// Append cubic Bézier curve (c operator).
219    /// Control points: (x1,y1), (x2,y2), endpoint: (x3,y3)
220    CurveTo(f32, f32, f32, f32, f32, f32),
221
222    /// Append cubic Bézier curve with first control point = current point (v operator).
223    CurveToV(f32, f32, f32, f32),
224
225    /// Append cubic Bézier curve with second control point = endpoint (y operator).
226    CurveToY(f32, f32, f32, f32),
227
228    /// Close current subpath (h operator).
229    /// Appends straight line to starting point.
230    ClosePath,
231
232    /// Append rectangle as complete subpath (re operator).
233    /// Parameters: x, y, width, height
234    Rectangle(f32, f32, f32, f32),
235
236    // Path painting operators
237    /// Stroke the path (S operator).
238    Stroke,
239
240    /// Close and stroke the path (s operator).
241    /// Equivalent to: h S
242    CloseStroke,
243
244    /// Fill the path using nonzero winding rule (f or F operator).
245    Fill,
246
247    /// Fill the path using even-odd rule (f* operator).
248    FillEvenOdd,
249
250    /// Fill then stroke the path (B operator).
251    /// Uses nonzero winding rule.
252    FillStroke,
253
254    /// Fill then stroke using even-odd rule (B* operator).
255    FillStrokeEvenOdd,
256
257    /// Close, fill, and stroke the path (b operator).
258    /// Equivalent to: h B
259    CloseFillStroke,
260
261    /// Close, fill, and stroke using even-odd rule (b* operator).
262    CloseFillStrokeEvenOdd,
263
264    /// End path without filling or stroking (n operator).
265    /// Used primarily before clipping.
266    EndPath,
267
268    // Clipping path operators
269    Clip,        // W
270    ClipEvenOdd, // W*
271
272    // Color operators
273    /// Set stroking color space (CS operator).
274    /// References ColorSpace resource dictionary.
275    SetStrokingColorSpace(String),
276
277    /// Set non-stroking color space (cs operator).
278    /// References ColorSpace resource dictionary.
279    SetNonStrokingColorSpace(String),
280
281    /// Set stroking color (SC, SCN operators).
282    /// Number of components depends on current color space.
283    SetStrokingColor(Vec<f32>),
284
285    /// Set non-stroking color (sc, scn operators).
286    /// Number of components depends on current color space.
287    SetNonStrokingColor(Vec<f32>),
288
289    /// Set stroking color to DeviceGray (G operator).
290    /// 0.0 = black, 1.0 = white
291    SetStrokingGray(f32),
292
293    /// Set non-stroking color to DeviceGray (g operator).
294    SetNonStrokingGray(f32),
295
296    /// Set stroking color to DeviceRGB (RG operator).
297    /// Components range from 0.0 to 1.0.
298    SetStrokingRGB(f32, f32, f32),
299
300    /// Set non-stroking color to DeviceRGB (rg operator).
301    SetNonStrokingRGB(f32, f32, f32),
302
303    /// Set stroking color to DeviceCMYK (K operator).
304    SetStrokingCMYK(f32, f32, f32, f32),
305
306    /// Set non-stroking color to DeviceCMYK (k operator).
307    SetNonStrokingCMYK(f32, f32, f32, f32),
308
309    // Shading operators
310    ShadingFill(String), // sh
311
312    // Inline image operators
313    /// Begin inline image (BI operator)
314    BeginInlineImage,
315    /// Inline image with parsed dictionary and data
316    InlineImage {
317        /// Image parameters (width, height, colorspace, etc.)
318        params: HashMap<String, Object>,
319        /// Raw image data
320        data: Vec<u8>,
321    },
322
323    // XObject operators
324    /// Paint external object (Do operator).
325    /// References XObject resource dictionary (images, forms).
326    PaintXObject(String),
327
328    // Marked content operators
329    BeginMarkedContent(String),                                   // BMC
330    BeginMarkedContentWithProps(String, HashMap<String, String>), // BDC
331    EndMarkedContent,                                             // EMC
332    DefineMarkedContentPoint(String),                             // MP
333    DefineMarkedContentPointWithProps(String, HashMap<String, String>), // DP
334
335    // Compatibility operators
336    BeginCompatibility, // BX
337    EndCompatibility,   // EX
338}
339
340/// Represents a text element in a TJ array for ShowTextArray operations.
341///
342/// The TJ operator takes an array of strings and position adjustments,
343/// allowing fine control over character and word spacing.
344///
345/// # Example
346///
347/// ```rust
348/// use oxidize_pdf::parser::content::{TextElement, ContentOperation};
349///
350/// // TJ array: [(Hello) -50 (World)]
351/// let tj_array = vec![
352///     TextElement::Text(b"Hello".to_vec()),
353///     TextElement::Spacing(-50.0), // Move left 50 units
354///     TextElement::Text(b"World".to_vec()),
355/// ];
356/// let op = ContentOperation::ShowTextArray(tj_array);
357/// ```
358#[derive(Debug, Clone, PartialEq)]
359pub enum TextElement {
360    /// Text string to show
361    Text(Vec<u8>),
362    /// Position adjustment in thousandths of text space units
363    /// Negative values move to the right (decrease spacing)
364    Spacing(f32),
365}
366
367/// Token types in content streams
368#[derive(Debug, Clone, PartialEq)]
369pub(super) enum Token {
370    Number(f32),
371    Integer(i32),
372    String(Vec<u8>),
373    HexString(Vec<u8>),
374    Name(String),
375    Operator(String),
376    ArrayStart,
377    ArrayEnd,
378    DictStart,
379    DictEnd,
380}
381
382/// Content stream tokenizer
383pub struct ContentTokenizer<'a> {
384    input: &'a [u8],
385    position: usize,
386}
387
388impl<'a> ContentTokenizer<'a> {
389    /// Create a new tokenizer for the given input
390    pub fn new(input: &'a [u8]) -> Self {
391        Self { input, position: 0 }
392    }
393
394    /// Get the next token from the stream
395    pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
396        self.skip_whitespace();
397
398        if self.position >= self.input.len() {
399            return Ok(None);
400        }
401
402        let ch = self.input[self.position];
403
404        match ch {
405            // Numbers
406            b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
407
408            // Strings
409            b'(' => self.read_literal_string(),
410            b'<' => {
411                if self.peek_next() == Some(b'<') {
412                    self.position += 2;
413                    Ok(Some(Token::DictStart))
414                } else {
415                    self.read_hex_string()
416                }
417            }
418            b'>' => {
419                if self.peek_next() == Some(b'>') {
420                    self.position += 2;
421                    Ok(Some(Token::DictEnd))
422                } else {
423                    Err(ParseError::SyntaxError {
424                        position: self.position,
425                        message: "Unexpected '>'".to_string(),
426                    })
427                }
428            }
429
430            // Arrays
431            b'[' => {
432                self.position += 1;
433                Ok(Some(Token::ArrayStart))
434            }
435            b']' => {
436                self.position += 1;
437                Ok(Some(Token::ArrayEnd))
438            }
439
440            // Names
441            b'/' => self.read_name(),
442
443            // Skip semicolons (corrupted content recovery)
444            b';' => {
445                self.position += 1;
446                self.next_token() // Recursively get next valid token
447            }
448
449            // Operators or other tokens
450            _ => self.read_operator(),
451        }
452    }
453
454    fn skip_whitespace(&mut self) {
455        while self.position < self.input.len() {
456            match self.input[self.position] {
457                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
458                b'%' => self.skip_comment(),
459                _ => break,
460            }
461        }
462    }
463
464    fn skip_comment(&mut self) {
465        while self.position < self.input.len() && self.input[self.position] != b'\n' {
466            self.position += 1;
467        }
468    }
469
470    fn peek_next(&self) -> Option<u8> {
471        if self.position + 1 < self.input.len() {
472            Some(self.input[self.position + 1])
473        } else {
474            None
475        }
476    }
477
478    fn read_number(&mut self) -> ParseResult<Option<Token>> {
479        let start = self.position;
480        let mut has_dot = false;
481
482        // Handle optional sign
483        if self.position < self.input.len()
484            && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
485        {
486            self.position += 1;
487        }
488
489        // Read digits and optional decimal point
490        while self.position < self.input.len() {
491            match self.input[self.position] {
492                b'0'..=b'9' => self.position += 1,
493                b'.' if !has_dot => {
494                    has_dot = true;
495                    self.position += 1;
496                }
497                _ => break,
498            }
499        }
500
501        let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
502            ParseError::SyntaxError {
503                position: start,
504                message: "Invalid number format".to_string(),
505            }
506        })?;
507
508        if has_dot {
509            let value = num_str
510                .parse::<f32>()
511                .map_err(|_| ParseError::SyntaxError {
512                    position: start,
513                    message: "Invalid float number".to_string(),
514                })?;
515            Ok(Some(Token::Number(value)))
516        } else {
517            let value = num_str
518                .parse::<i32>()
519                .map_err(|_| ParseError::SyntaxError {
520                    position: start,
521                    message: "Invalid integer number".to_string(),
522                })?;
523            Ok(Some(Token::Integer(value)))
524        }
525    }
526
527    fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
528        self.position += 1; // Skip opening '('
529        let mut result = Vec::new();
530        let mut paren_depth = 1;
531        let mut escape = false;
532
533        while self.position < self.input.len() && paren_depth > 0 {
534            let ch = self.input[self.position];
535            self.position += 1;
536
537            if escape {
538                match ch {
539                    b'n' => result.push(b'\n'),
540                    b'r' => result.push(b'\r'),
541                    b't' => result.push(b'\t'),
542                    b'b' => result.push(b'\x08'),
543                    b'f' => result.push(b'\x0C'),
544                    b'(' => result.push(b'('),
545                    b')' => result.push(b')'),
546                    b'\\' => result.push(b'\\'),
547                    b'0'..=b'7' => {
548                        // Octal escape sequence
549                        self.position -= 1;
550                        let octal_value = self.read_octal_escape()?;
551                        result.push(octal_value);
552                    }
553                    _ => result.push(ch), // Unknown escape, treat as literal
554                }
555                escape = false;
556            } else {
557                match ch {
558                    b'\\' => escape = true,
559                    b'(' => {
560                        paren_depth += 1;
561                        result.push(ch);
562                    }
563                    b')' => {
564                        paren_depth -= 1;
565                        if paren_depth > 0 {
566                            result.push(ch);
567                        }
568                    }
569                    _ => result.push(ch),
570                }
571            }
572        }
573
574        Ok(Some(Token::String(result)))
575    }
576
577    fn read_octal_escape(&mut self) -> ParseResult<u8> {
578        let mut value = 0u8;
579        let mut count = 0;
580
581        while count < 3 && self.position < self.input.len() {
582            match self.input[self.position] {
583                b'0'..=b'7' => {
584                    value = value * 8 + (self.input[self.position] - b'0');
585                    self.position += 1;
586                    count += 1;
587                }
588                _ => break,
589            }
590        }
591
592        Ok(value)
593    }
594
595    fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
596        self.position += 1; // Skip opening '<'
597        let mut result = Vec::new();
598        let mut nibble = None;
599
600        while self.position < self.input.len() {
601            let ch = self.input[self.position];
602
603            match ch {
604                b'>' => {
605                    self.position += 1;
606                    // Handle odd number of hex digits
607                    if let Some(n) = nibble {
608                        result.push(n << 4);
609                    }
610                    return Ok(Some(Token::HexString(result)));
611                }
612                b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
613                    let digit = if ch <= b'9' {
614                        ch - b'0'
615                    } else if ch <= b'F' {
616                        ch - b'A' + 10
617                    } else {
618                        ch - b'a' + 10
619                    };
620
621                    if let Some(n) = nibble {
622                        result.push((n << 4) | digit);
623                        nibble = None;
624                    } else {
625                        nibble = Some(digit);
626                    }
627                    self.position += 1;
628                }
629                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
630                    // Skip whitespace in hex strings
631                    self.position += 1;
632                }
633                _ => {
634                    return Err(ParseError::SyntaxError {
635                        position: self.position,
636                        message: format!("Invalid character in hex string: {:?}", ch as char),
637                    });
638                }
639            }
640        }
641
642        Err(ParseError::SyntaxError {
643            position: self.position,
644            message: "Unterminated hex string".to_string(),
645        })
646    }
647
648    fn read_name(&mut self) -> ParseResult<Option<Token>> {
649        self.position += 1; // Skip '/'
650        let start = self.position;
651
652        while self.position < self.input.len() {
653            let ch = self.input[self.position];
654            match ch {
655                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
656                | b']' | b'{' | b'}' | b'/' | b'%' => break,
657                b'#' => {
658                    // Handle hex escape in name
659                    self.position += 1;
660                    if self.position + 1 < self.input.len() {
661                        self.position += 2;
662                    }
663                }
664                _ => self.position += 1,
665            }
666        }
667
668        let name_bytes = &self.input[start..self.position];
669        let name = self.decode_name(name_bytes)?;
670        Ok(Some(Token::Name(name)))
671    }
672
673    fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
674        let mut result = Vec::new();
675        let mut i = 0;
676
677        while i < bytes.len() {
678            if bytes[i] == b'#' && i + 2 < bytes.len() {
679                // Hex escape
680                let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
681                    ParseError::SyntaxError {
682                        position: self.position,
683                        message: "Invalid hex escape in name".to_string(),
684                    }
685                })?;
686                let value =
687                    u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
688                        position: self.position,
689                        message: "Invalid hex escape in name".to_string(),
690                    })?;
691                result.push(value);
692                i += 3;
693            } else {
694                result.push(bytes[i]);
695                i += 1;
696            }
697        }
698
699        String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
700            position: self.position,
701            message: "Invalid UTF-8 in name".to_string(),
702        })
703    }
704
705    fn read_operator(&mut self) -> ParseResult<Option<Token>> {
706        let start = self.position;
707
708        while self.position < self.input.len() {
709            let ch = self.input[self.position];
710            match ch {
711                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
712                | b']' | b'{' | b'}' | b'/' | b'%' | b';' => break,
713                _ => self.position += 1,
714            }
715        }
716
717        let op_bytes = &self.input[start..self.position];
718        let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
719            position: start,
720            message: "Invalid operator".to_string(),
721        })?;
722
723        Ok(Some(Token::Operator(op.to_string())))
724    }
725}
726
727/// High-level content stream parser.
728///
729/// Converts tokenized content streams into structured `ContentOperation` values.
730/// This parser handles the operand stack and operator parsing according to PDF specifications.
731///
732/// # Usage
733///
734/// The parser is typically used through its static methods:
735///
736/// ```rust
737/// use oxidize_pdf::parser::content::ContentParser;
738///
739/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
740/// let content = b"q 1 0 0 1 50 50 cm 100 100 200 150 re S Q";
741/// let operations = ContentParser::parse(content)?;
742/// # Ok(())
743/// # }
744/// ```
745pub struct ContentParser {
746    tokens: Vec<Token>,
747    position: usize,
748}
749
750impl ContentParser {
751    /// Create a new content parser
752    pub fn new(_content: &[u8]) -> Self {
753        Self {
754            tokens: Vec::new(),
755            position: 0,
756        }
757    }
758
759    /// Parse a content stream into a vector of operators.
760    ///
761    /// This is a convenience method that creates a parser and processes the entire stream.
762    ///
763    /// # Arguments
764    ///
765    /// * `content` - Raw content stream bytes (may be compressed)
766    ///
767    /// # Returns
768    ///
769    /// A vector of parsed `ContentOperation` values in the order they appear.
770    ///
771    /// # Errors
772    ///
773    /// Returns an error if:
774    /// - Invalid operator syntax is encountered
775    /// - Operators have incorrect number/type of operands
776    /// - Unknown operators are found
777    ///
778    /// # Example
779    ///
780    /// ```rust
781    /// use oxidize_pdf::parser::content::{ContentParser, ContentOperation};
782    ///
783    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
784    /// let content = b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET";
785    /// let operations = ContentParser::parse(content)?;
786    ///
787    /// assert_eq!(operations.len(), 5);
788    /// assert!(matches!(operations[0], ContentOperation::BeginText));
789    /// # Ok(())
790    /// # }
791    /// ```
792    pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
793        Self::parse_content(content)
794    }
795
796    /// Parse a content stream into a vector of operators.
797    ///
798    /// This method tokenizes the input and converts it to operations.
799    /// It handles the PDF postfix notation where operands precede operators.
800    pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
801        let mut tokenizer = ContentTokenizer::new(content);
802        let mut tokens = Vec::new();
803
804        // Tokenize the entire stream
805        while let Some(token) = tokenizer.next_token()? {
806            tokens.push(token);
807        }
808
809        let mut parser = Self {
810            tokens,
811            position: 0,
812        };
813
814        parser.parse_operators()
815    }
816
817    fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
818        let mut operators = Vec::new();
819        let mut operand_stack: Vec<Token> = Vec::new();
820
821        while self.position < self.tokens.len() {
822            let token = self.tokens[self.position].clone();
823            self.position += 1;
824
825            match &token {
826                Token::Operator(op) => {
827                    let operator = self.parse_operator(op, &mut operand_stack)?;
828                    operators.push(operator);
829                }
830                _ => {
831                    // Not an operator, push to operand stack
832                    operand_stack.push(token);
833                }
834            }
835        }
836
837        Ok(operators)
838    }
839
840    fn parse_operator(
841        &mut self,
842        op: &str,
843        operands: &mut Vec<Token>,
844    ) -> ParseResult<ContentOperation> {
845        let operator = match op {
846            // Text object operators
847            "BT" => ContentOperation::BeginText,
848            "ET" => ContentOperation::EndText,
849
850            // Text state operators
851            "Tc" => {
852                let spacing = self.pop_number(operands)?;
853                ContentOperation::SetCharSpacing(spacing)
854            }
855            "Tw" => {
856                let spacing = self.pop_number(operands)?;
857                ContentOperation::SetWordSpacing(spacing)
858            }
859            "Tz" => {
860                let scale = self.pop_number(operands)?;
861                ContentOperation::SetHorizontalScaling(scale)
862            }
863            "TL" => {
864                let leading = self.pop_number(operands)?;
865                ContentOperation::SetLeading(leading)
866            }
867            "Tf" => {
868                let size = self.pop_number(operands)?;
869                let font = self.pop_name(operands)?;
870                ContentOperation::SetFont(font, size)
871            }
872            "Tr" => {
873                let mode = self.pop_integer(operands)?;
874                ContentOperation::SetTextRenderMode(mode)
875            }
876            "Ts" => {
877                let rise = self.pop_number(operands)?;
878                ContentOperation::SetTextRise(rise)
879            }
880
881            // Text positioning operators
882            "Td" => {
883                let ty = self.pop_number(operands)?;
884                let tx = self.pop_number(operands)?;
885                ContentOperation::MoveText(tx, ty)
886            }
887            "TD" => {
888                let ty = self.pop_number(operands)?;
889                let tx = self.pop_number(operands)?;
890                ContentOperation::MoveTextSetLeading(tx, ty)
891            }
892            "Tm" => {
893                let f = self.pop_number(operands)?;
894                let e = self.pop_number(operands)?;
895                let d = self.pop_number(operands)?;
896                let c = self.pop_number(operands)?;
897                let b = self.pop_number(operands)?;
898                let a = self.pop_number(operands)?;
899                ContentOperation::SetTextMatrix(a, b, c, d, e, f)
900            }
901            "T*" => ContentOperation::NextLine,
902
903            // Text showing operators
904            "Tj" => {
905                let text = self.pop_string(operands)?;
906                ContentOperation::ShowText(text)
907            }
908            "TJ" => {
909                let array = self.pop_array(operands)?;
910                let elements = self.parse_text_array(array)?;
911                ContentOperation::ShowTextArray(elements)
912            }
913            "'" => {
914                let text = self.pop_string(operands)?;
915                ContentOperation::NextLineShowText(text)
916            }
917            "\"" => {
918                let text = self.pop_string(operands)?;
919                let aw = self.pop_number(operands)?;
920                let ac = self.pop_number(operands)?;
921                ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
922            }
923
924            // Graphics state operators
925            "q" => ContentOperation::SaveGraphicsState,
926            "Q" => ContentOperation::RestoreGraphicsState,
927            "cm" => {
928                let f = self.pop_number(operands)?;
929                let e = self.pop_number(operands)?;
930                let d = self.pop_number(operands)?;
931                let c = self.pop_number(operands)?;
932                let b = self.pop_number(operands)?;
933                let a = self.pop_number(operands)?;
934                ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
935            }
936            "w" => {
937                let width = self.pop_number(operands)?;
938                ContentOperation::SetLineWidth(width)
939            }
940            "J" => {
941                let cap = self.pop_integer(operands)?;
942                ContentOperation::SetLineCap(cap)
943            }
944            "j" => {
945                let join = self.pop_integer(operands)?;
946                ContentOperation::SetLineJoin(join)
947            }
948            "M" => {
949                let limit = self.pop_number(operands)?;
950                ContentOperation::SetMiterLimit(limit)
951            }
952            "d" => {
953                let phase = self.pop_number(operands)?;
954                let array = self.pop_array(operands)?;
955                let pattern = self.parse_dash_array(array)?;
956                ContentOperation::SetDashPattern(pattern, phase)
957            }
958            "ri" => {
959                let intent = self.pop_name(operands)?;
960                ContentOperation::SetIntent(intent)
961            }
962            "i" => {
963                let flatness = self.pop_number(operands)?;
964                ContentOperation::SetFlatness(flatness)
965            }
966            "gs" => {
967                let name = self.pop_name(operands)?;
968                ContentOperation::SetGraphicsStateParams(name)
969            }
970
971            // Path construction operators
972            "m" => {
973                let y = self.pop_number(operands)?;
974                let x = self.pop_number(operands)?;
975                ContentOperation::MoveTo(x, y)
976            }
977            "l" => {
978                let y = self.pop_number(operands)?;
979                let x = self.pop_number(operands)?;
980                ContentOperation::LineTo(x, y)
981            }
982            "c" => {
983                let y3 = self.pop_number(operands)?;
984                let x3 = self.pop_number(operands)?;
985                let y2 = self.pop_number(operands)?;
986                let x2 = self.pop_number(operands)?;
987                let y1 = self.pop_number(operands)?;
988                let x1 = self.pop_number(operands)?;
989                ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
990            }
991            "v" => {
992                let y3 = self.pop_number(operands)?;
993                let x3 = self.pop_number(operands)?;
994                let y2 = self.pop_number(operands)?;
995                let x2 = self.pop_number(operands)?;
996                ContentOperation::CurveToV(x2, y2, x3, y3)
997            }
998            "y" => {
999                let y3 = self.pop_number(operands)?;
1000                let x3 = self.pop_number(operands)?;
1001                let y1 = self.pop_number(operands)?;
1002                let x1 = self.pop_number(operands)?;
1003                ContentOperation::CurveToY(x1, y1, x3, y3)
1004            }
1005            "h" => ContentOperation::ClosePath,
1006            "re" => {
1007                let height = self.pop_number(operands)?;
1008                let width = self.pop_number(operands)?;
1009                let y = self.pop_number(operands)?;
1010                let x = self.pop_number(operands)?;
1011                ContentOperation::Rectangle(x, y, width, height)
1012            }
1013
1014            // Path painting operators
1015            "S" => ContentOperation::Stroke,
1016            "s" => ContentOperation::CloseStroke,
1017            "f" | "F" => ContentOperation::Fill,
1018            "f*" => ContentOperation::FillEvenOdd,
1019            "B" => ContentOperation::FillStroke,
1020            "B*" => ContentOperation::FillStrokeEvenOdd,
1021            "b" => ContentOperation::CloseFillStroke,
1022            "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1023            "n" => ContentOperation::EndPath,
1024
1025            // Clipping path operators
1026            "W" => ContentOperation::Clip,
1027            "W*" => ContentOperation::ClipEvenOdd,
1028
1029            // Color operators
1030            "CS" => {
1031                let name = self.pop_name(operands)?;
1032                ContentOperation::SetStrokingColorSpace(name)
1033            }
1034            "cs" => {
1035                let name = self.pop_name(operands)?;
1036                ContentOperation::SetNonStrokingColorSpace(name)
1037            }
1038            "SC" | "SCN" => {
1039                let components = self.pop_color_components(operands)?;
1040                ContentOperation::SetStrokingColor(components)
1041            }
1042            "sc" | "scn" => {
1043                let components = self.pop_color_components(operands)?;
1044                ContentOperation::SetNonStrokingColor(components)
1045            }
1046            "G" => {
1047                let gray = self.pop_number(operands)?;
1048                ContentOperation::SetStrokingGray(gray)
1049            }
1050            "g" => {
1051                let gray = self.pop_number(operands)?;
1052                ContentOperation::SetNonStrokingGray(gray)
1053            }
1054            "RG" => {
1055                let b = self.pop_number(operands)?;
1056                let g = self.pop_number(operands)?;
1057                let r = self.pop_number(operands)?;
1058                ContentOperation::SetStrokingRGB(r, g, b)
1059            }
1060            "rg" => {
1061                let b = self.pop_number(operands)?;
1062                let g = self.pop_number(operands)?;
1063                let r = self.pop_number(operands)?;
1064                ContentOperation::SetNonStrokingRGB(r, g, b)
1065            }
1066            "K" => {
1067                let k = self.pop_number(operands)?;
1068                let y = self.pop_number(operands)?;
1069                let m = self.pop_number(operands)?;
1070                let c = self.pop_number(operands)?;
1071                ContentOperation::SetStrokingCMYK(c, m, y, k)
1072            }
1073            "k" => {
1074                let k = self.pop_number(operands)?;
1075                let y = self.pop_number(operands)?;
1076                let m = self.pop_number(operands)?;
1077                let c = self.pop_number(operands)?;
1078                ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1079            }
1080
1081            // Shading operators
1082            "sh" => {
1083                let name = self.pop_name(operands)?;
1084                ContentOperation::ShadingFill(name)
1085            }
1086
1087            // XObject operators
1088            "Do" => {
1089                let name = self.pop_name(operands)?;
1090                ContentOperation::PaintXObject(name)
1091            }
1092
1093            // Marked content operators
1094            "BMC" => {
1095                let tag = self.pop_name(operands)?;
1096                ContentOperation::BeginMarkedContent(tag)
1097            }
1098            "BDC" => {
1099                let props = self.pop_dict_or_name(operands)?;
1100                let tag = self.pop_name(operands)?;
1101                ContentOperation::BeginMarkedContentWithProps(tag, props)
1102            }
1103            "EMC" => ContentOperation::EndMarkedContent,
1104            "MP" => {
1105                let tag = self.pop_name(operands)?;
1106                ContentOperation::DefineMarkedContentPoint(tag)
1107            }
1108            "DP" => {
1109                let props = self.pop_dict_or_name(operands)?;
1110                let tag = self.pop_name(operands)?;
1111                ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1112            }
1113
1114            // Compatibility operators
1115            "BX" => ContentOperation::BeginCompatibility,
1116            "EX" => ContentOperation::EndCompatibility,
1117
1118            // Inline images are handled specially
1119            "BI" => {
1120                operands.clear(); // Clear any remaining operands
1121                self.parse_inline_image()?
1122            }
1123
1124            _ => {
1125                return Err(ParseError::SyntaxError {
1126                    position: self.position,
1127                    message: format!("Unknown operator: {op}"),
1128                });
1129            }
1130        };
1131
1132        operands.clear(); // Clear operands after processing
1133        Ok(operator)
1134    }
1135
1136    // Helper methods for popping operands
1137    fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1138        match operands.pop() {
1139            Some(Token::Number(n)) => Ok(n),
1140            Some(Token::Integer(i)) => Ok(i as f32),
1141            _ => Err(ParseError::SyntaxError {
1142                position: self.position,
1143                message: "Expected number operand".to_string(),
1144            }),
1145        }
1146    }
1147
1148    fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1149        match operands.pop() {
1150            Some(Token::Integer(i)) => Ok(i),
1151            _ => Err(ParseError::SyntaxError {
1152                position: self.position,
1153                message: "Expected integer operand".to_string(),
1154            }),
1155        }
1156    }
1157
1158    fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1159        match operands.pop() {
1160            Some(Token::Name(n)) => Ok(n),
1161            _ => Err(ParseError::SyntaxError {
1162                position: self.position,
1163                message: "Expected name operand".to_string(),
1164            }),
1165        }
1166    }
1167
1168    fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1169        match operands.pop() {
1170            Some(Token::String(s)) => Ok(s),
1171            Some(Token::HexString(s)) => Ok(s),
1172            _ => Err(ParseError::SyntaxError {
1173                position: self.position,
1174                message: "Expected string operand".to_string(),
1175            }),
1176        }
1177    }
1178
1179    fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1180        // First check if we have an ArrayEnd at the top (which we should for a complete array)
1181        let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1182        if has_array_end {
1183            operands.pop(); // Remove the ArrayEnd
1184        }
1185
1186        let mut array = Vec::new();
1187        let mut found_start = false;
1188
1189        // Pop tokens until we find ArrayStart
1190        while let Some(token) = operands.pop() {
1191            match token {
1192                Token::ArrayStart => {
1193                    found_start = true;
1194                    break;
1195                }
1196                Token::ArrayEnd => {
1197                    // Skip any additional ArrayEnd tokens (shouldn't happen in well-formed PDFs)
1198                    continue;
1199                }
1200                _ => array.push(token),
1201            }
1202        }
1203
1204        if !found_start {
1205            return Err(ParseError::SyntaxError {
1206                position: self.position,
1207                message: "Expected array".to_string(),
1208            });
1209        }
1210
1211        array.reverse(); // We collected in reverse order
1212        Ok(array)
1213    }
1214
1215    fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1216        if let Some(token) = operands.pop() {
1217            match token {
1218                Token::Name(name) => {
1219                    // Name token - this is a reference to properties in the resource dictionary
1220                    // For now, we'll store it as a special entry to indicate it's a resource reference
1221                    let mut props = HashMap::new();
1222                    props.insert("__resource_ref".to_string(), name);
1223                    Ok(props)
1224                }
1225                Token::DictStart => {
1226                    // Inline dictionary - parse key-value pairs
1227                    let mut props = HashMap::new();
1228
1229                    // Look for dictionary entries in remaining operands
1230                    while let Some(value_token) = operands.pop() {
1231                        if matches!(value_token, Token::DictEnd) {
1232                            break;
1233                        }
1234
1235                        // Expect key-value pairs
1236                        if let Token::Name(key) = value_token {
1237                            if let Some(value_token) = operands.pop() {
1238                                let value = match value_token {
1239                                    Token::Name(name) => name,
1240                                    Token::String(s) => String::from_utf8_lossy(&s).to_string(),
1241                                    Token::Integer(i) => i.to_string(),
1242                                    Token::Number(f) => f.to_string(),
1243                                    _ => continue, // Skip unsupported value types
1244                                };
1245                                props.insert(key, value);
1246                            }
1247                        }
1248                    }
1249
1250                    Ok(props)
1251                }
1252                _ => {
1253                    // Unexpected token type, treat as empty properties
1254                    Ok(HashMap::new())
1255                }
1256            }
1257        } else {
1258            // No operand available
1259            Err(ParseError::SyntaxError {
1260                position: 0,
1261                message: "Expected dictionary or name for marked content properties".to_string(),
1262            })
1263        }
1264    }
1265
1266    fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1267        let mut components = Vec::new();
1268
1269        // Pop all numeric values from the stack
1270        while let Some(token) = operands.last() {
1271            match token {
1272                Token::Number(n) => {
1273                    components.push(*n);
1274                    operands.pop();
1275                }
1276                Token::Integer(i) => {
1277                    components.push(*i as f32);
1278                    operands.pop();
1279                }
1280                _ => break,
1281            }
1282        }
1283
1284        components.reverse();
1285        Ok(components)
1286    }
1287
1288    fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1289        let mut elements = Vec::new();
1290
1291        for token in tokens {
1292            match token {
1293                Token::String(s) | Token::HexString(s) => {
1294                    elements.push(TextElement::Text(s));
1295                }
1296                Token::Number(n) => {
1297                    elements.push(TextElement::Spacing(n));
1298                }
1299                Token::Integer(i) => {
1300                    elements.push(TextElement::Spacing(i as f32));
1301                }
1302                _ => {
1303                    return Err(ParseError::SyntaxError {
1304                        position: self.position,
1305                        message: "Invalid element in text array".to_string(),
1306                    });
1307                }
1308            }
1309        }
1310
1311        Ok(elements)
1312    }
1313
1314    fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1315        let mut pattern = Vec::new();
1316
1317        for token in tokens {
1318            match token {
1319                Token::Number(n) => pattern.push(n),
1320                Token::Integer(i) => pattern.push(i as f32),
1321                _ => {
1322                    return Err(ParseError::SyntaxError {
1323                        position: self.position,
1324                        message: "Invalid element in dash array".to_string(),
1325                    });
1326                }
1327            }
1328        }
1329
1330        Ok(pattern)
1331    }
1332
1333    fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1334        // Parse inline image dictionary until we find ID
1335        let mut params = HashMap::new();
1336
1337        while self.position < self.tokens.len() {
1338            // Check if we've reached the ID operator
1339            if let Token::Operator(op) = &self.tokens[self.position] {
1340                if op == "ID" {
1341                    self.position += 1;
1342                    break;
1343                }
1344            }
1345
1346            // Parse key-value pairs for image parameters
1347            // Keys are abbreviated in inline images:
1348            // /W -> Width, /H -> Height, /CS -> ColorSpace, /BPC -> BitsPerComponent
1349            // /F -> Filter, /DP -> DecodeParms, /IM -> ImageMask, /I -> Interpolate
1350            if let Token::Name(key) = &self.tokens[self.position] {
1351                self.position += 1;
1352                if self.position >= self.tokens.len() {
1353                    break;
1354                }
1355
1356                // Parse the value
1357                let value = match &self.tokens[self.position] {
1358                    Token::Integer(n) => Object::Integer(*n as i64),
1359                    Token::Number(n) => Object::Real(*n as f64),
1360                    Token::Name(s) => Object::Name(expand_inline_name(s)),
1361                    Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1362                    Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1363                    _ => Object::Null,
1364                };
1365
1366                // Expand abbreviated keys to full names
1367                let full_key = expand_inline_key(key);
1368                params.insert(full_key, value);
1369                self.position += 1;
1370            } else {
1371                self.position += 1;
1372            }
1373        }
1374
1375        // Now we should be at the image data
1376        // Collect bytes until we find EI
1377        let mut data = Vec::new();
1378
1379        // For inline images, we need to read raw bytes until EI
1380        // This is tricky because EI could appear in the image data
1381        // We need to look for EI followed by a whitespace or operator
1382
1383        // Simplified approach: collect all tokens until we find EI operator
1384        while self.position < self.tokens.len() {
1385            if let Token::Operator(op) = &self.tokens[self.position] {
1386                if op == "EI" {
1387                    self.position += 1;
1388                    break;
1389                }
1390            }
1391
1392            // Convert token to bytes (simplified - real implementation would need raw byte access)
1393            match &self.tokens[self.position] {
1394                Token::String(bytes) => data.extend_from_slice(bytes),
1395                Token::HexString(bytes) => data.extend_from_slice(bytes),
1396                Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1397                Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1398                Token::Name(s) => data.extend_from_slice(s.as_bytes()),
1399                Token::Operator(s) if s != "EI" => data.extend_from_slice(s.as_bytes()),
1400                _ => {}
1401            }
1402            self.position += 1;
1403        }
1404
1405        Ok(ContentOperation::InlineImage { params, data })
1406    }
1407}
1408
1409/// Expand abbreviated inline image key names to full names
1410fn expand_inline_key(key: &str) -> String {
1411    match key {
1412        "W" => "Width".to_string(),
1413        "H" => "Height".to_string(),
1414        "CS" | "ColorSpace" => "ColorSpace".to_string(),
1415        "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1416        "F" => "Filter".to_string(),
1417        "DP" | "DecodeParms" => "DecodeParms".to_string(),
1418        "IM" => "ImageMask".to_string(),
1419        "I" => "Interpolate".to_string(),
1420        "Intent" => "Intent".to_string(),
1421        "D" => "Decode".to_string(),
1422        _ => key.to_string(),
1423    }
1424}
1425
1426/// Expand abbreviated inline image color space names
1427fn expand_inline_name(name: &str) -> String {
1428    match name {
1429        "G" => "DeviceGray".to_string(),
1430        "RGB" => "DeviceRGB".to_string(),
1431        "CMYK" => "DeviceCMYK".to_string(),
1432        "I" => "Indexed".to_string(),
1433        "AHx" => "ASCIIHexDecode".to_string(),
1434        "A85" => "ASCII85Decode".to_string(),
1435        "LZW" => "LZWDecode".to_string(),
1436        "Fl" => "FlateDecode".to_string(),
1437        "RL" => "RunLengthDecode".to_string(),
1438        "DCT" => "DCTDecode".to_string(),
1439        "CCF" => "CCITTFaxDecode".to_string(),
1440        _ => name.to_string(),
1441    }
1442}
1443
1444#[cfg(test)]
1445mod tests {
1446    use super::*;
1447
1448    #[test]
1449    fn test_tokenize_numbers() {
1450        let input = b"123 -45 3.14159 -0.5 .5";
1451        let mut tokenizer = ContentTokenizer::new(input);
1452
1453        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1454        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1455        assert_eq!(
1456            tokenizer.next_token().unwrap(),
1457            Some(Token::Number(3.14159))
1458        );
1459        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1460        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1461        assert_eq!(tokenizer.next_token().unwrap(), None);
1462    }
1463
1464    #[test]
1465    fn test_tokenize_strings() {
1466        let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1467        let mut tokenizer = ContentTokenizer::new(input);
1468
1469        assert_eq!(
1470            tokenizer.next_token().unwrap(),
1471            Some(Token::String(b"Hello World".to_vec()))
1472        );
1473        assert_eq!(
1474            tokenizer.next_token().unwrap(),
1475            Some(Token::String(b"Hello\nWorld".to_vec()))
1476        );
1477        assert_eq!(
1478            tokenizer.next_token().unwrap(),
1479            Some(Token::String(b"Nested (paren)".to_vec()))
1480        );
1481    }
1482
1483    #[test]
1484    fn test_tokenize_hex_strings() {
1485        let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1486        let mut tokenizer = ContentTokenizer::new(input);
1487
1488        assert_eq!(
1489            tokenizer.next_token().unwrap(),
1490            Some(Token::HexString(b"Hello".to_vec()))
1491        );
1492        assert_eq!(
1493            tokenizer.next_token().unwrap(),
1494            Some(Token::HexString(b"Hello".to_vec()))
1495        );
1496    }
1497
1498    #[test]
1499    fn test_tokenize_names() {
1500        let input = b"/Name /Name#20with#20spaces /A#42C";
1501        let mut tokenizer = ContentTokenizer::new(input);
1502
1503        assert_eq!(
1504            tokenizer.next_token().unwrap(),
1505            Some(Token::Name("Name".to_string()))
1506        );
1507        assert_eq!(
1508            tokenizer.next_token().unwrap(),
1509            Some(Token::Name("Name with spaces".to_string()))
1510        );
1511        assert_eq!(
1512            tokenizer.next_token().unwrap(),
1513            Some(Token::Name("ABC".to_string()))
1514        );
1515    }
1516
1517    #[test]
1518    fn test_tokenize_operators() {
1519        let input = b"BT Tj ET q Q";
1520        let mut tokenizer = ContentTokenizer::new(input);
1521
1522        assert_eq!(
1523            tokenizer.next_token().unwrap(),
1524            Some(Token::Operator("BT".to_string()))
1525        );
1526        assert_eq!(
1527            tokenizer.next_token().unwrap(),
1528            Some(Token::Operator("Tj".to_string()))
1529        );
1530        assert_eq!(
1531            tokenizer.next_token().unwrap(),
1532            Some(Token::Operator("ET".to_string()))
1533        );
1534        assert_eq!(
1535            tokenizer.next_token().unwrap(),
1536            Some(Token::Operator("q".to_string()))
1537        );
1538        assert_eq!(
1539            tokenizer.next_token().unwrap(),
1540            Some(Token::Operator("Q".to_string()))
1541        );
1542    }
1543
1544    #[test]
1545    fn test_parse_text_operators() {
1546        let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1547        let operators = ContentParser::parse(content).unwrap();
1548
1549        assert_eq!(operators.len(), 5);
1550        assert_eq!(operators[0], ContentOperation::BeginText);
1551        assert_eq!(
1552            operators[1],
1553            ContentOperation::SetFont("F1".to_string(), 12.0)
1554        );
1555        assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1556        assert_eq!(
1557            operators[3],
1558            ContentOperation::ShowText(b"Hello World".to_vec())
1559        );
1560        assert_eq!(operators[4], ContentOperation::EndText);
1561    }
1562
1563    #[test]
1564    fn test_parse_graphics_operators() {
1565        let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1566        let operators = ContentParser::parse(content).unwrap();
1567
1568        assert_eq!(operators.len(), 6);
1569        assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1570        assert_eq!(
1571            operators[1],
1572            ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1573        );
1574        assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1575        assert_eq!(
1576            operators[3],
1577            ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1578        );
1579        assert_eq!(operators[4], ContentOperation::Stroke);
1580        assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1581    }
1582
1583    #[test]
1584    fn test_parse_color_operators() {
1585        let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1586        let operators = ContentParser::parse(content).unwrap();
1587
1588        assert_eq!(operators.len(), 3);
1589        assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1590        assert_eq!(
1591            operators[1],
1592            ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1593        );
1594        assert_eq!(
1595            operators[2],
1596            ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1597        );
1598    }
1599
1600    // Comprehensive tests for all ContentOperation variants
1601    mod comprehensive_tests {
1602        use super::*;
1603
1604        #[test]
1605        fn test_all_text_operators() {
1606            // Test basic text operators that work with current parser
1607            let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1608            let operators = ContentParser::parse(content).unwrap();
1609
1610            assert_eq!(operators[0], ContentOperation::BeginText);
1611            assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1612            assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1613            assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1614            assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1615            assert_eq!(
1616                operators[5],
1617                ContentOperation::SetFont("F1".to_string(), 12.0)
1618            );
1619            assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1620            assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1621            assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1622            assert_eq!(
1623                operators[9],
1624                ContentOperation::MoveTextSetLeading(50.0, 150.0)
1625            );
1626            assert_eq!(operators[10], ContentOperation::NextLine);
1627            assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1628            assert_eq!(operators[12], ContentOperation::EndText);
1629        }
1630
1631        #[test]
1632        fn test_all_graphics_state_operators() {
1633            // Test basic graphics state operators without arrays
1634            let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1635            let operators = ContentParser::parse(content).unwrap();
1636
1637            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1638            assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1639            assert_eq!(
1640                operators[2],
1641                ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1642            );
1643            assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1644            assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1645            assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1646            assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1647            assert_eq!(
1648                operators[7],
1649                ContentOperation::SetGraphicsStateParams("GS1".to_string())
1650            );
1651            assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1652            assert_eq!(
1653                operators[9],
1654                ContentOperation::SetIntent("Perceptual".to_string())
1655            );
1656        }
1657
1658        #[test]
1659        fn test_all_path_construction_operators() {
1660            let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1661            let operators = ContentParser::parse(content).unwrap();
1662
1663            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1664            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1665            assert_eq!(
1666                operators[2],
1667                ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1668            );
1669            assert_eq!(
1670                operators[3],
1671                ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1672            );
1673            assert_eq!(
1674                operators[4],
1675                ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1676            );
1677            assert_eq!(operators[5], ContentOperation::ClosePath);
1678            assert_eq!(
1679                operators[6],
1680                ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1681            );
1682        }
1683
1684        #[test]
1685        fn test_all_path_painting_operators() {
1686            let content = b"S s f F f* B B* b b* n W W*";
1687            let operators = ContentParser::parse(content).unwrap();
1688
1689            assert_eq!(operators[0], ContentOperation::Stroke);
1690            assert_eq!(operators[1], ContentOperation::CloseStroke);
1691            assert_eq!(operators[2], ContentOperation::Fill);
1692            assert_eq!(operators[3], ContentOperation::Fill); // F is alias for f
1693            assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1694            assert_eq!(operators[5], ContentOperation::FillStroke);
1695            assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1696            assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1697            assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1698            assert_eq!(operators[9], ContentOperation::EndPath);
1699            assert_eq!(operators[10], ContentOperation::Clip);
1700            assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1701        }
1702
1703        #[test]
1704        fn test_all_color_operators() {
1705            // Test basic color operators that work with current parser
1706            let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1707            let operators = ContentParser::parse(content).unwrap();
1708
1709            assert_eq!(
1710                operators[0],
1711                ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1712            );
1713            assert_eq!(
1714                operators[1],
1715                ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1716            );
1717            assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1718            assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1719            assert_eq!(
1720                operators[4],
1721                ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1722            );
1723            assert_eq!(
1724                operators[5],
1725                ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1726            );
1727            assert_eq!(
1728                operators[6],
1729                ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1730            );
1731            assert_eq!(
1732                operators[7],
1733                ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1734            );
1735            assert_eq!(
1736                operators[8],
1737                ContentOperation::ShadingFill("Shade1".to_string())
1738            );
1739        }
1740
1741        #[test]
1742        fn test_xobject_and_marked_content_operators() {
1743            // Test basic XObject and marked content operators
1744            let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1745            let operators = ContentParser::parse(content).unwrap();
1746
1747            assert_eq!(
1748                operators[0],
1749                ContentOperation::PaintXObject("Image1".to_string())
1750            );
1751            assert_eq!(
1752                operators[1],
1753                ContentOperation::BeginMarkedContent("MC1".to_string())
1754            );
1755            assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1756            assert_eq!(
1757                operators[3],
1758                ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1759            );
1760            assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1761            assert_eq!(operators[5], ContentOperation::EndCompatibility);
1762        }
1763
1764        #[test]
1765        fn test_complex_content_stream() {
1766            let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1767            let operators = ContentParser::parse(content).unwrap();
1768
1769            assert_eq!(operators.len(), 8);
1770            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1771            assert_eq!(
1772                operators[1],
1773                ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1774            );
1775            assert_eq!(operators[2], ContentOperation::BeginText);
1776            assert_eq!(
1777                operators[3],
1778                ContentOperation::SetFont("F1".to_string(), 12.0)
1779            );
1780            assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1781            assert_eq!(
1782                operators[5],
1783                ContentOperation::ShowText(b"Complex".to_vec())
1784            );
1785            assert_eq!(operators[6], ContentOperation::EndText);
1786            assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1787        }
1788
1789        #[test]
1790        fn test_tokenizer_whitespace_handling() {
1791            let input = b"  \t\n\r  BT  \t\n  /F1   12.5  \t Tf  \n\r  ET  ";
1792            let mut tokenizer = ContentTokenizer::new(input);
1793
1794            assert_eq!(
1795                tokenizer.next_token().unwrap(),
1796                Some(Token::Operator("BT".to_string()))
1797            );
1798            assert_eq!(
1799                tokenizer.next_token().unwrap(),
1800                Some(Token::Name("F1".to_string()))
1801            );
1802            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1803            assert_eq!(
1804                tokenizer.next_token().unwrap(),
1805                Some(Token::Operator("Tf".to_string()))
1806            );
1807            assert_eq!(
1808                tokenizer.next_token().unwrap(),
1809                Some(Token::Operator("ET".to_string()))
1810            );
1811            assert_eq!(tokenizer.next_token().unwrap(), None);
1812        }
1813
1814        #[test]
1815        fn test_tokenizer_edge_cases() {
1816            // Test basic number formats that are actually supported
1817            let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1818            let mut tokenizer = ContentTokenizer::new(input);
1819
1820            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1821            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1822            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1823            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1824            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1825            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1826            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1827            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1828        }
1829
1830        #[test]
1831        fn test_string_parsing_edge_cases() {
1832            let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1833            let mut tokenizer = ContentTokenizer::new(input);
1834
1835            assert_eq!(
1836                tokenizer.next_token().unwrap(),
1837                Some(Token::String(b"Simple".to_vec()))
1838            );
1839            assert_eq!(
1840                tokenizer.next_token().unwrap(),
1841                Some(Token::String(b"With\\backslash".to_vec()))
1842            );
1843            assert_eq!(
1844                tokenizer.next_token().unwrap(),
1845                Some(Token::String(b"With)paren".to_vec()))
1846            );
1847            assert_eq!(
1848                tokenizer.next_token().unwrap(),
1849                Some(Token::String(b"With\newline".to_vec()))
1850            );
1851            assert_eq!(
1852                tokenizer.next_token().unwrap(),
1853                Some(Token::String(b"With\ttab".to_vec()))
1854            );
1855            assert_eq!(
1856                tokenizer.next_token().unwrap(),
1857                Some(Token::String(b"With\rcarriage".to_vec()))
1858            );
1859            assert_eq!(
1860                tokenizer.next_token().unwrap(),
1861                Some(Token::String(b"With\x08backspace".to_vec()))
1862            );
1863            assert_eq!(
1864                tokenizer.next_token().unwrap(),
1865                Some(Token::String(b"With\x0Cformfeed".to_vec()))
1866            );
1867            assert_eq!(
1868                tokenizer.next_token().unwrap(),
1869                Some(Token::String(b"With(leftparen".to_vec()))
1870            );
1871            assert_eq!(
1872                tokenizer.next_token().unwrap(),
1873                Some(Token::String(b"With)rightparen".to_vec()))
1874            );
1875        }
1876
1877        #[test]
1878        fn test_hex_string_parsing() {
1879            let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1880            let mut tokenizer = ContentTokenizer::new(input);
1881
1882            assert_eq!(
1883                tokenizer.next_token().unwrap(),
1884                Some(Token::HexString(b"Hello".to_vec()))
1885            );
1886            assert_eq!(
1887                tokenizer.next_token().unwrap(),
1888                Some(Token::HexString(b"Hello".to_vec()))
1889            );
1890            assert_eq!(
1891                tokenizer.next_token().unwrap(),
1892                Some(Token::HexString(b"HelloW".to_vec()))
1893            );
1894            assert_eq!(
1895                tokenizer.next_token().unwrap(),
1896                Some(Token::HexString(b"Hello\x50".to_vec()))
1897            );
1898        }
1899
1900        #[test]
1901        fn test_name_parsing_edge_cases() {
1902            let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1903            let mut tokenizer = ContentTokenizer::new(input);
1904
1905            assert_eq!(
1906                tokenizer.next_token().unwrap(),
1907                Some(Token::Name("Name".to_string()))
1908            );
1909            assert_eq!(
1910                tokenizer.next_token().unwrap(),
1911                Some(Token::Name("Name with spaces".to_string()))
1912            );
1913            assert_eq!(
1914                tokenizer.next_token().unwrap(),
1915                Some(Token::Name("Name#with#hash".to_string()))
1916            );
1917            assert_eq!(
1918                tokenizer.next_token().unwrap(),
1919                Some(Token::Name("Name/with/slash".to_string()))
1920            );
1921            assert_eq!(
1922                tokenizer.next_token().unwrap(),
1923                Some(Token::Name("EmptyName".to_string()))
1924            );
1925        }
1926
1927        #[test]
1928        fn test_operator_parsing_edge_cases() {
1929            let content = b"q q q Q Q Q BT BT ET ET";
1930            let operators = ContentParser::parse(content).unwrap();
1931
1932            assert_eq!(operators.len(), 10);
1933            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1934            assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1935            assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1936            assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1937            assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1938            assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1939            assert_eq!(operators[6], ContentOperation::BeginText);
1940            assert_eq!(operators[7], ContentOperation::BeginText);
1941            assert_eq!(operators[8], ContentOperation::EndText);
1942            assert_eq!(operators[9], ContentOperation::EndText);
1943        }
1944
1945        #[test]
1946        fn test_error_handling_insufficient_operands() {
1947            let content = b"100 Td"; // Missing y coordinate
1948            let result = ContentParser::parse(content);
1949            assert!(result.is_err());
1950        }
1951
1952        #[test]
1953        fn test_error_handling_invalid_operator() {
1954            let content = b"100 200 INVALID";
1955            let result = ContentParser::parse(content);
1956            assert!(result.is_err());
1957        }
1958
1959        #[test]
1960        fn test_error_handling_malformed_string() {
1961            // Test that the tokenizer handles malformed strings appropriately
1962            let input = b"(Unclosed string";
1963            let mut tokenizer = ContentTokenizer::new(input);
1964            let result = tokenizer.next_token();
1965            // The current implementation may not detect this as an error
1966            // so we'll just test that we get some result
1967            assert!(result.is_ok() || result.is_err());
1968        }
1969
1970        #[test]
1971        fn test_error_handling_malformed_hex_string() {
1972            let input = b"<48656C6C6G>";
1973            let mut tokenizer = ContentTokenizer::new(input);
1974            let result = tokenizer.next_token();
1975            assert!(result.is_err());
1976        }
1977
1978        #[test]
1979        fn test_error_handling_malformed_name() {
1980            let input = b"/Name#GG";
1981            let mut tokenizer = ContentTokenizer::new(input);
1982            let result = tokenizer.next_token();
1983            assert!(result.is_err());
1984        }
1985
1986        #[test]
1987        fn test_empty_content_stream() {
1988            let content = b"";
1989            let operators = ContentParser::parse(content).unwrap();
1990            assert_eq!(operators.len(), 0);
1991        }
1992
1993        #[test]
1994        fn test_whitespace_only_content_stream() {
1995            let content = b"   \t\n\r   ";
1996            let operators = ContentParser::parse(content).unwrap();
1997            assert_eq!(operators.len(), 0);
1998        }
1999
2000        #[test]
2001        fn test_mixed_integer_and_real_operands() {
2002            // Test with simple operands that work with current parser
2003            let content = b"100 200 m 150 200 l";
2004            let operators = ContentParser::parse(content).unwrap();
2005
2006            assert_eq!(operators.len(), 2);
2007            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2008            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2009        }
2010
2011        #[test]
2012        fn test_negative_operands() {
2013            let content = b"-100 -200 Td -50.5 -75.2 TD";
2014            let operators = ContentParser::parse(content).unwrap();
2015
2016            assert_eq!(operators.len(), 2);
2017            assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2018            assert_eq!(
2019                operators[1],
2020                ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2021            );
2022        }
2023
2024        #[test]
2025        fn test_large_numbers() {
2026            let content = b"999999.999999 -999999.999999 m";
2027            let operators = ContentParser::parse(content).unwrap();
2028
2029            assert_eq!(operators.len(), 1);
2030            assert_eq!(
2031                operators[0],
2032                ContentOperation::MoveTo(999999.999999, -999999.999999)
2033            );
2034        }
2035
2036        #[test]
2037        fn test_scientific_notation() {
2038            // Test with simple decimal numbers since scientific notation isn't implemented
2039            let content = b"123.45 -456.78 m";
2040            let operators = ContentParser::parse(content).unwrap();
2041
2042            assert_eq!(operators.len(), 1);
2043            assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2044        }
2045
2046        #[test]
2047        fn test_show_text_array_complex() {
2048            // Test simple text array without complex syntax
2049            let content = b"(Hello) TJ";
2050            let result = ContentParser::parse(content);
2051            // This should fail since TJ expects array, but test the error handling
2052            assert!(result.is_err());
2053        }
2054
2055        #[test]
2056        fn test_dash_pattern_empty() {
2057            // Test simple dash pattern without array syntax
2058            let content = b"0 d";
2059            let result = ContentParser::parse(content);
2060            // This should fail since dash pattern needs array, but test the error handling
2061            assert!(result.is_err());
2062        }
2063
2064        #[test]
2065        fn test_dash_pattern_complex() {
2066            // Test simple dash pattern without complex array syntax
2067            let content = b"2.5 d";
2068            let result = ContentParser::parse(content);
2069            // This should fail since dash pattern needs array, but test the error handling
2070            assert!(result.is_err());
2071        }
2072
2073        #[test]
2074        fn test_pop_array_removes_array_end() {
2075            // Test that pop_array correctly handles ArrayEnd tokens
2076            let parser = ContentParser::new(b"");
2077
2078            // Test normal array: [1 2 3]
2079            let mut operands = vec![
2080                Token::ArrayStart,
2081                Token::Integer(1),
2082                Token::Integer(2),
2083                Token::Integer(3),
2084                Token::ArrayEnd,
2085            ];
2086            let result = parser.pop_array(&mut operands).unwrap();
2087            assert_eq!(result.len(), 3);
2088            assert!(operands.is_empty());
2089
2090            // Test array without ArrayEnd (backwards compatibility)
2091            let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2092            let result = parser.pop_array(&mut operands).unwrap();
2093            assert_eq!(result.len(), 2);
2094            assert!(operands.is_empty());
2095        }
2096
2097        #[test]
2098        fn test_dash_array_parsing_valid() {
2099            // Test that parser correctly parses valid dash arrays
2100            let parser = ContentParser::new(b"");
2101
2102            // Test with valid numbers only
2103            let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2104            let result = parser.parse_dash_array(valid_tokens).unwrap();
2105            assert_eq!(result, vec![3.0, 2.0]);
2106
2107            // Test empty dash array
2108            let empty_tokens = vec![];
2109            let result = parser.parse_dash_array(empty_tokens).unwrap();
2110            let expected: Vec<f32> = vec![];
2111            assert_eq!(result, expected);
2112        }
2113
2114        #[test]
2115        fn test_text_array_parsing_valid() {
2116            // Test that parser correctly parses valid text arrays
2117            let parser = ContentParser::new(b"");
2118
2119            // Test with valid elements only
2120            let valid_tokens = vec![
2121                Token::String(b"Hello".to_vec()),
2122                Token::Number(-100.0),
2123                Token::String(b"World".to_vec()),
2124            ];
2125            let result = parser.parse_text_array(valid_tokens).unwrap();
2126            assert_eq!(result.len(), 3);
2127        }
2128
2129        #[test]
2130        fn test_inline_image_handling() {
2131            let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2132            let operators = ContentParser::parse(content).unwrap();
2133
2134            assert_eq!(operators.len(), 1);
2135            match &operators[0] {
2136                ContentOperation::InlineImage { params, data: _ } => {
2137                    // Check parsed parameters
2138                    assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2139                    assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2140                    assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2141                    assert_eq!(
2142                        params.get("ColorSpace"),
2143                        Some(&Object::Name("DeviceRGB".to_string()))
2144                    );
2145                    // Data field is not captured, just verify params
2146                }
2147                _ => panic!("Expected InlineImage operation"),
2148            }
2149        }
2150
2151        #[test]
2152        fn test_inline_image_with_filter() {
2153            let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2154            let operators = ContentParser::parse(content).unwrap();
2155
2156            assert_eq!(operators.len(), 1);
2157            match &operators[0] {
2158                ContentOperation::InlineImage { params, data: _ } => {
2159                    assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2160                    assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2161                    assert_eq!(
2162                        params.get("ColorSpace"),
2163                        Some(&Object::Name("DeviceGray".to_string()))
2164                    );
2165                    assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2166                    assert_eq!(
2167                        params.get("Filter"),
2168                        Some(&Object::Name("ASCIIHexDecode".to_string()))
2169                    );
2170                }
2171                _ => panic!("Expected InlineImage operation"),
2172            }
2173        }
2174
2175        #[test]
2176        fn test_content_parser_performance() {
2177            let mut content = Vec::new();
2178            for i in 0..1000 {
2179                content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2180            }
2181
2182            let start = std::time::Instant::now();
2183            let operators = ContentParser::parse(&content).unwrap();
2184            let duration = start.elapsed();
2185
2186            assert_eq!(operators.len(), 1000);
2187            assert!(duration.as_millis() < 100); // Should parse 1000 operators in under 100ms
2188        }
2189
2190        #[test]
2191        fn test_tokenizer_performance() {
2192            let mut input = Vec::new();
2193            for i in 0..1000 {
2194                input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2195            }
2196
2197            let start = std::time::Instant::now();
2198            let mut tokenizer = ContentTokenizer::new(&input);
2199            let mut count = 0;
2200            while tokenizer.next_token().unwrap().is_some() {
2201                count += 1;
2202            }
2203            let duration = start.elapsed();
2204
2205            assert_eq!(count, 2000); // 1000 pairs of numbers
2206            assert!(duration.as_millis() < 50); // Should tokenize 2000 tokens in under 50ms
2207        }
2208
2209        #[test]
2210        fn test_memory_usage_large_content() {
2211            let mut content = Vec::new();
2212            for i in 0..10000 {
2213                content.extend_from_slice(
2214                    format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2215                        .as_bytes(),
2216                );
2217            }
2218
2219            let operators = ContentParser::parse(&content).unwrap();
2220            assert_eq!(operators.len(), 10000);
2221
2222            // Verify all operations are CurveTo
2223            for op in operators {
2224                matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2225            }
2226        }
2227
2228        #[test]
2229        fn test_concurrent_parsing() {
2230            use std::sync::Arc;
2231            use std::thread;
2232
2233            let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2234            let handles: Vec<_> = (0..10)
2235                .map(|_| {
2236                    let content_clone = content.clone();
2237                    thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2238                })
2239                .collect();
2240
2241            for handle in handles {
2242                let operators = handle.join().unwrap();
2243                assert_eq!(operators.len(), 5);
2244                assert_eq!(operators[0], ContentOperation::BeginText);
2245                assert_eq!(operators[4], ContentOperation::EndText);
2246            }
2247        }
2248
2249        // ========== NEW COMPREHENSIVE TESTS ==========
2250
2251        #[test]
2252        fn test_tokenizer_hex_string_edge_cases() {
2253            let mut tokenizer = ContentTokenizer::new(b"<>");
2254            let token = tokenizer.next_token().unwrap().unwrap();
2255            match token {
2256                Token::HexString(data) => assert!(data.is_empty()),
2257                _ => panic!("Expected empty hex string"),
2258            }
2259
2260            // Odd number of hex digits
2261            let mut tokenizer = ContentTokenizer::new(b"<123>");
2262            let token = tokenizer.next_token().unwrap().unwrap();
2263            match token {
2264                Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2265                _ => panic!("Expected hex string with odd digits"),
2266            }
2267
2268            // Hex string with whitespace
2269            let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2270            let token = tokenizer.next_token().unwrap().unwrap();
2271            match token {
2272                Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2273                _ => panic!("Expected hex string with whitespace"),
2274            }
2275        }
2276
2277        #[test]
2278        fn test_tokenizer_literal_string_escape_sequences() {
2279            // Test all standard escape sequences
2280            let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2281            let token = tokenizer.next_token().unwrap().unwrap();
2282            match token {
2283                Token::String(data) => {
2284                    assert_eq!(
2285                        data,
2286                        vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2287                    );
2288                }
2289                _ => panic!("Expected string with escapes"),
2290            }
2291
2292            // Test octal escape sequences
2293            let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2294            let token = tokenizer.next_token().unwrap().unwrap();
2295            match token {
2296                Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2297                _ => panic!("Expected string with octal escapes"),
2298            }
2299        }
2300
2301        #[test]
2302        fn test_tokenizer_nested_parentheses() {
2303            let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2304            let token = tokenizer.next_token().unwrap().unwrap();
2305            match token {
2306                Token::String(data) => {
2307                    assert_eq!(data, b"outer (inner) text");
2308                }
2309                _ => panic!("Expected string with nested parentheses"),
2310            }
2311
2312            // Multiple levels of nesting
2313            let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2314            let token = tokenizer.next_token().unwrap().unwrap();
2315            match token {
2316                Token::String(data) => {
2317                    assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2318                }
2319                _ => panic!("Expected string with deep nesting"),
2320            }
2321        }
2322
2323        #[test]
2324        fn test_tokenizer_name_hex_escapes() {
2325            let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2326            let token = tokenizer.next_token().unwrap().unwrap();
2327            match token {
2328                Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2329                _ => panic!("Expected name with hex escapes"),
2330            }
2331
2332            // Test various special characters
2333            let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2334            let token = tokenizer.next_token().unwrap().unwrap();
2335            match token {
2336                Token::Name(name) => assert_eq!(name, "Special/()<>"),
2337                _ => panic!("Expected name with special character escapes"),
2338            }
2339        }
2340
2341        #[test]
2342        fn test_tokenizer_number_edge_cases() {
2343            // Very large integers
2344            let mut tokenizer = ContentTokenizer::new(b"2147483647");
2345            let token = tokenizer.next_token().unwrap().unwrap();
2346            match token {
2347                Token::Integer(n) => assert_eq!(n, 2147483647),
2348                _ => panic!("Expected large integer"),
2349            }
2350
2351            // Very small numbers
2352            let mut tokenizer = ContentTokenizer::new(b"0.00001");
2353            let token = tokenizer.next_token().unwrap().unwrap();
2354            match token {
2355                Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2356                _ => panic!("Expected small float"),
2357            }
2358
2359            // Numbers starting with dot
2360            let mut tokenizer = ContentTokenizer::new(b".5");
2361            let token = tokenizer.next_token().unwrap().unwrap();
2362            match token {
2363                Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2364                _ => panic!("Expected float starting with dot"),
2365            }
2366        }
2367
2368        #[test]
2369        fn test_parser_complex_path_operations() {
2370            let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2371            let operators = ContentParser::parse(content).unwrap();
2372
2373            assert_eq!(operators.len(), 6);
2374            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2375            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2376            assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2377            assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2378            assert_eq!(operators[4], ContentOperation::ClosePath);
2379            assert_eq!(operators[5], ContentOperation::Fill);
2380        }
2381
2382        #[test]
2383        fn test_parser_bezier_curves() {
2384            let content = b"100 100 150 50 200 150 c";
2385            let operators = ContentParser::parse(content).unwrap();
2386
2387            assert_eq!(operators.len(), 1);
2388            match &operators[0] {
2389                ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2390                    // Values are parsed in reverse order: last 6 values for c operator
2391                    // Stack order: 100 100 150 50 200 150
2392                    // Pop order: x1=100, y1=100, x2=150, y2=50, x3=200, y3=150
2393                    assert!(x1.is_finite() && y1.is_finite());
2394                    assert!(x2.is_finite() && y2.is_finite());
2395                    assert!(x3.is_finite() && y3.is_finite());
2396                    // Verify we have 6 coordinate values
2397                    assert!(*x1 >= 50.0 && *x1 <= 200.0);
2398                    assert!(*y1 >= 50.0 && *y1 <= 200.0);
2399                }
2400                _ => panic!("Expected CurveTo operation"),
2401            }
2402        }
2403
2404        #[test]
2405        fn test_parser_color_operations() {
2406            let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2407            let operators = ContentParser::parse(content).unwrap();
2408
2409            assert_eq!(operators.len(), 5);
2410            match &operators[0] {
2411                ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2412                _ => panic!("Expected SetNonStrokingGray"),
2413            }
2414            match &operators[1] {
2415                ContentOperation::SetNonStrokingRGB(r, g, b) => {
2416                    assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2417                }
2418                _ => panic!("Expected SetNonStrokingRGB"),
2419            }
2420        }
2421
2422        #[test]
2423        fn test_parser_text_positioning_advanced() {
2424            let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2425            let operators = ContentParser::parse(content).unwrap();
2426
2427            assert_eq!(operators.len(), 7);
2428            assert_eq!(operators[0], ContentOperation::BeginText);
2429            match &operators[1] {
2430                ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2431                    assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2432                }
2433                _ => panic!("Expected SetTextMatrix"),
2434            }
2435            assert_eq!(operators[6], ContentOperation::EndText);
2436        }
2437
2438        #[test]
2439        fn test_parser_graphics_state_operations() {
2440            let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2441            let operators = ContentParser::parse(content).unwrap();
2442
2443            assert_eq!(operators.len(), 7);
2444            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2445            match &operators[1] {
2446                ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2447                    assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2448                }
2449                _ => panic!("Expected SetTransformMatrix"),
2450            }
2451            assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2452        }
2453
2454        #[test]
2455        fn test_parser_xobject_operations() {
2456            let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2457            let operators = ContentParser::parse(content).unwrap();
2458
2459            assert_eq!(operators.len(), 3);
2460            for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2461                match &operators[i] {
2462                    ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2463                    _ => panic!("Expected PaintXObject"),
2464                }
2465            }
2466        }
2467
2468        #[test]
2469        fn test_parser_marked_content_operations() {
2470            let content = b"/P BMC (Tagged content) Tj EMC";
2471            let operators = ContentParser::parse(content).unwrap();
2472
2473            assert_eq!(operators.len(), 3);
2474            match &operators[0] {
2475                ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2476                _ => panic!("Expected BeginMarkedContent"),
2477            }
2478            assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2479        }
2480
2481        #[test]
2482        fn test_parser_error_handling_invalid_operators() {
2483            // Missing operands for move operator
2484            let content = b"m";
2485            let result = ContentParser::parse(content);
2486            assert!(result.is_err());
2487
2488            // Invalid hex string (no closing >)
2489            let content = b"<ABC DEF BT";
2490            let result = ContentParser::parse(content);
2491            assert!(result.is_err());
2492
2493            // Test that we can detect actual parsing errors
2494            let content = b"100 200 300"; // Numbers without operator should parse ok
2495            let result = ContentParser::parse(content);
2496            assert!(result.is_ok()); // This should actually be ok since no operator is attempted
2497        }
2498
2499        #[test]
2500        fn test_parser_whitespace_tolerance() {
2501            let content = b"  \n\t  100   \r\n  200  \t m  \n";
2502            let operators = ContentParser::parse(content).unwrap();
2503
2504            assert_eq!(operators.len(), 1);
2505            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2506        }
2507
2508        #[test]
2509        fn test_tokenizer_comment_handling() {
2510            let content = b"100 % This is a comment\n200 m % Another comment";
2511            let operators = ContentParser::parse(content).unwrap();
2512
2513            assert_eq!(operators.len(), 1);
2514            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2515        }
2516
2517        #[test]
2518        fn test_parser_stream_with_binary_data() {
2519            // Test content stream with comment containing binary-like data
2520            let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2521
2522            let operators = ContentParser::parse(content).unwrap();
2523            assert_eq!(operators.len(), 2);
2524            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2525            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2526        }
2527
2528        #[test]
2529        fn test_tokenizer_array_parsing() {
2530            // Test simple operations that don't require complex array parsing
2531            let content = b"100 200 m 150 250 l";
2532            let operators = ContentParser::parse(content).unwrap();
2533
2534            assert_eq!(operators.len(), 2);
2535            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2536            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2537        }
2538
2539        #[test]
2540        fn test_parser_rectangle_operations() {
2541            let content = b"10 20 100 50 re 0 0 200 300 re";
2542            let operators = ContentParser::parse(content).unwrap();
2543
2544            assert_eq!(operators.len(), 2);
2545            match &operators[0] {
2546                ContentOperation::Rectangle(x, y, width, height) => {
2547                    assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2548                }
2549                _ => panic!("Expected Rectangle operation"),
2550            }
2551            match &operators[1] {
2552                ContentOperation::Rectangle(x, y, width, height) => {
2553                    assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2554                }
2555                _ => panic!("Expected Rectangle operation"),
2556            }
2557        }
2558
2559        #[test]
2560        fn test_parser_clipping_operations() {
2561            let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2562            let operators = ContentParser::parse(content).unwrap();
2563
2564            assert_eq!(operators.len(), 6);
2565            assert_eq!(operators[1], ContentOperation::Clip);
2566            assert_eq!(operators[2], ContentOperation::EndPath);
2567            assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2568            assert_eq!(operators[5], ContentOperation::EndPath);
2569        }
2570
2571        #[test]
2572        fn test_parser_painting_operations() {
2573            let content = b"S s f f* B B* b b*";
2574            let operators = ContentParser::parse(content).unwrap();
2575
2576            assert_eq!(operators.len(), 8);
2577            assert_eq!(operators[0], ContentOperation::Stroke);
2578            assert_eq!(operators[1], ContentOperation::CloseStroke);
2579            assert_eq!(operators[2], ContentOperation::Fill);
2580            assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2581            assert_eq!(operators[4], ContentOperation::FillStroke);
2582            assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2583            assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2584            assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2585        }
2586
2587        #[test]
2588        fn test_parser_line_style_operations() {
2589            let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2590            let operators = ContentParser::parse(content).unwrap();
2591
2592            assert_eq!(operators.len(), 5);
2593            assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2594            assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2595            assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2596            assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2597            // Dash pattern test would need array support
2598        }
2599
2600        #[test]
2601        fn test_parser_text_state_operations() {
2602            let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2603            let operators = ContentParser::parse(content).unwrap();
2604
2605            assert_eq!(operators.len(), 5);
2606            assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2607            assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2608            assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2609            assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2610            assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2611        }
2612
2613        #[test]
2614        fn test_parser_unicode_text() {
2615            let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2616            let operators = ContentParser::parse(content).unwrap();
2617
2618            assert_eq!(operators.len(), 3);
2619            assert_eq!(operators[0], ContentOperation::BeginText);
2620            match &operators[1] {
2621                ContentOperation::ShowText(text) => {
2622                    assert!(text.len() > 5); // Should contain Unicode bytes
2623                }
2624                _ => panic!("Expected ShowText operation"),
2625            }
2626            assert_eq!(operators[2], ContentOperation::EndText);
2627        }
2628
2629        #[test]
2630        fn test_parser_stress_test_large_coordinates() {
2631            let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2632            let operators = ContentParser::parse(content).unwrap();
2633
2634            assert_eq!(operators.len(), 1);
2635            match &operators[0] {
2636                ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2637                    assert!((*_x1 - 999999.999).abs() < 0.1);
2638                    assert!((*_y1 - (-999999.999)).abs() < 0.1);
2639                    assert!((*_x3 - 999999.999).abs() < 0.1);
2640                }
2641                _ => panic!("Expected CurveTo operation"),
2642            }
2643        }
2644
2645        #[test]
2646        fn test_parser_empty_content_stream() {
2647            let content = b"";
2648            let operators = ContentParser::parse(content).unwrap();
2649            assert!(operators.is_empty());
2650
2651            let content = b"   \n\t\r   ";
2652            let operators = ContentParser::parse(content).unwrap();
2653            assert!(operators.is_empty());
2654        }
2655
2656        #[test]
2657        fn test_tokenizer_error_recovery() {
2658            // Test that parser can handle malformed but recoverable content
2659            let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2660            let result = ContentParser::parse(content);
2661            // Should either parse successfully or fail gracefully
2662            assert!(result.is_ok() || result.is_err());
2663        }
2664
2665        #[test]
2666        fn test_parser_optimization_repeated_operations() {
2667            // Test performance with many repeated operations
2668            let mut content = Vec::new();
2669            for i in 0..1000 {
2670                content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2671            }
2672
2673            let start = std::time::Instant::now();
2674            let operators = ContentParser::parse(&content).unwrap();
2675            let duration = start.elapsed();
2676
2677            assert_eq!(operators.len(), 1000);
2678            assert!(duration.as_millis() < 200); // Should be fast
2679        }
2680
2681        #[test]
2682        fn test_parser_memory_efficiency_large_strings() {
2683            // Test with large text content
2684            let large_text = "A".repeat(10000);
2685            let content = format!("BT ({}) Tj ET", large_text);
2686            let operators = ContentParser::parse(content.as_bytes()).unwrap();
2687
2688            assert_eq!(operators.len(), 3);
2689            match &operators[1] {
2690                ContentOperation::ShowText(text) => {
2691                    assert_eq!(text.len(), 10000);
2692                }
2693                _ => panic!("Expected ShowText operation"),
2694            }
2695        }
2696    }
2697
2698    #[test]
2699    fn test_content_stream_too_large() {
2700        // Test handling of very large content streams (covering potential size limits)
2701        let mut large_content = Vec::new();
2702
2703        // Create a content stream with many operations
2704        for i in 0..10000 {
2705            large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
2706        }
2707        large_content.extend_from_slice(b"S");
2708
2709        // Should handle large content without panic
2710        let result = ContentParser::parse_content(&large_content);
2711        assert!(result.is_ok());
2712
2713        let operations = result.unwrap();
2714        // Should have many MoveTo operations plus one Stroke
2715        assert!(operations.len() > 10000);
2716    }
2717
2718    #[test]
2719    fn test_invalid_operator_handling() {
2720        // Test parsing with invalid operators
2721        let content = b"100 200 INVALID_OP 300 400 m";
2722        let result = ContentParser::parse_content(content);
2723
2724        // Should either handle gracefully or return error
2725        if let Ok(operations) = result {
2726            // If it succeeds, should have at least the valid MoveTo
2727            assert!(operations
2728                .iter()
2729                .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
2730        }
2731    }
2732
2733    #[test]
2734    fn test_nested_arrays_malformed() {
2735        // Test malformed nested arrays in TJ operator
2736        let content = b"[[(Hello] [World)]] TJ";
2737        let result = ContentParser::parse_content(content);
2738
2739        // Should handle malformed arrays gracefully
2740        assert!(result.is_ok() || result.is_err());
2741    }
2742
2743    #[test]
2744    fn test_escape_sequences_in_strings() {
2745        // Test various escape sequences in strings
2746        let test_cases = vec![
2747            (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
2748            (b"(\\\\)".as_slice(), b"\\".as_slice()),
2749            (b"(\\(\\))".as_slice(), b"()".as_slice()),
2750            (b"(\\123)".as_slice(), b"S".as_slice()), // Octal 123 = 83 = 'S'
2751            (b"(\\0)".as_slice(), b"\0".as_slice()),
2752        ];
2753
2754        for (input, expected) in test_cases {
2755            let mut content = Vec::new();
2756            content.extend_from_slice(input);
2757            content.extend_from_slice(b" Tj");
2758
2759            let result = ContentParser::parse_content(&content);
2760            assert!(result.is_ok());
2761
2762            let operations = result.unwrap();
2763            if let ContentOperation::ShowText(text) = &operations[0] {
2764                assert_eq!(text, expected, "Failed for input: {:?}", input);
2765            } else {
2766                panic!("Expected ShowText operation");
2767            }
2768        }
2769    }
2770
2771    #[test]
2772    fn test_content_with_inline_images() {
2773        // Test handling of inline images in content stream
2774        let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
2775        let result = ContentParser::parse_content(content);
2776
2777        // Should handle inline images (even if not fully implemented)
2778        assert!(result.is_ok() || result.is_err());
2779    }
2780
2781    #[test]
2782    fn test_operator_with_missing_operands() {
2783        // Test operators with insufficient operands
2784        let test_cases = vec![
2785            b"Tj" as &[u8], // ShowText without string
2786            b"m",           // MoveTo without coordinates
2787            b"rg",          // SetRGBColor without values
2788            b"Tf",          // SetFont without name and size
2789        ];
2790
2791        for content in test_cases {
2792            let result = ContentParser::parse_content(content);
2793            // Should handle gracefully (error or skip)
2794            assert!(result.is_ok() || result.is_err());
2795        }
2796    }
2797}