oxidize_pdf/parser/
content.rs

1//! PDF Content Stream Parser - Complete support for PDF graphics operators
2//!
3//! This module implements comprehensive parsing of PDF content streams according to the PDF specification.
4//! Content streams contain the actual drawing instructions (operators) that render text, graphics, and images
5//! on PDF pages.
6//!
7//! # Overview
8//!
9//! Content streams are sequences of PDF operators that describe:
10//! - Text positioning and rendering
11//! - Path construction and painting
12//! - Color and graphics state management
13//! - Image and XObject placement
14//! - Coordinate transformations
15//!
16//! # Architecture
17//!
18//! The parser is divided into two main components:
19//! - `ContentTokenizer`: Low-level tokenization of content stream bytes
20//! - `ContentParser`: High-level parsing of tokens into structured operations
21//!
22//! # Example
23//!
24//! ```rust,no_run
25//! use oxidize_pdf::parser::content::{ContentParser, ContentOperation};
26//!
27//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
28//! // Parse a content stream
29//! let content_stream = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
30//! let operations = ContentParser::parse_content(content_stream)?;
31//!
32//! // Process operations
33//! for op in operations {
34//!     match op {
35//!         ContentOperation::BeginText => println!("Start text object"),
36//!         ContentOperation::SetFont(name, size) => println!("Font: {} at {}", name, size),
37//!         ContentOperation::ShowText(text) => println!("Text: {:?}", text),
38//!         _ => {}
39//!     }
40//! }
41//! # Ok(())
42//! # }
43//! ```
44//!
45//! # Supported Operators
46//!
47//! This parser supports all standard PDF operators including:
48//! - Text operators (BT, ET, Tj, TJ, Tf, Td, etc.)
49//! - Graphics state operators (q, Q, cm, w, J, etc.)
50//! - Path construction operators (m, l, c, re, h)
51//! - Path painting operators (S, f, B, n, etc.)
52//! - Color operators (g, rg, k, cs, scn, etc.)
53//! - XObject operators (Do)
54//! - Marked content operators (BMC, BDC, EMC, etc.)
55
56use super::{ParseError, ParseResult};
57use crate::objects::Object;
58use std::collections::HashMap;
59
60/// Represents a single operator in a PDF content stream.
61///
62/// Each variant corresponds to a specific PDF operator and carries the associated
63/// operands. These operations form a complete instruction set for rendering PDF content.
64///
65/// # Categories
66///
67/// Operations are grouped into several categories:
68/// - **Text Object**: BeginText, EndText
69/// - **Text State**: Font, spacing, scaling, rendering mode
70/// - **Text Positioning**: Matrix transforms, moves, line advances
71/// - **Text Showing**: Display text with various formatting
72/// - **Graphics State**: Save/restore, transforms, line properties
73/// - **Path Construction**: Move, line, curve, rectangle operations
74/// - **Path Painting**: Stroke, fill, clipping operations
75/// - **Color**: RGB, CMYK, grayscale, and color space operations
76/// - **XObject**: External graphics and form placement
77/// - **Marked Content**: Semantic tagging for accessibility
78///
79/// # Example
80///
81/// ```rust
82/// use oxidize_pdf::parser::content::{ContentOperation};
83///
84/// // Text operation
85/// let op1 = ContentOperation::ShowText(b"Hello".to_vec());
86///
87/// // Graphics operation
88/// let op2 = ContentOperation::SetLineWidth(2.0);
89///
90/// // Path operation
91/// let op3 = ContentOperation::Rectangle(10.0, 10.0, 100.0, 50.0);
92/// ```
93#[derive(Debug, Clone, PartialEq)]
94pub enum ContentOperation {
95    // Text object operators
96    /// Begin a text object (BT operator).
97    /// All text showing operations must occur within a text object.
98    BeginText,
99
100    /// End a text object (ET operator).
101    /// Closes the current text object started with BeginText.
102    EndText,
103
104    // Text state operators
105    /// Set character spacing (Tc operator).
106    /// Additional space between characters in unscaled text units.
107    SetCharSpacing(f32),
108
109    /// Set word spacing (Tw operator).
110    /// Additional space for ASCII space character (0x20) in unscaled text units.
111    SetWordSpacing(f32),
112
113    /// Set horizontal text scaling (Tz operator).
114    /// Percentage of normal width (100 = normal).
115    SetHorizontalScaling(f32),
116
117    /// Set text leading (TL operator).
118    /// Vertical distance between baselines for T* operator.
119    SetLeading(f32),
120
121    /// Set font and size (Tf operator).
122    /// Font name must match a key in the Resources/Font dictionary.
123    SetFont(String, f32),
124
125    /// Set text rendering mode (Tr operator).
126    /// 0=fill, 1=stroke, 2=fill+stroke, 3=invisible, 4=fill+clip, 5=stroke+clip, 6=fill+stroke+clip, 7=clip
127    SetTextRenderMode(i32),
128
129    /// Set text rise (Ts operator).
130    /// Vertical displacement for superscripts/subscripts in text units.
131    SetTextRise(f32),
132
133    // Text positioning operators
134    /// Move text position (Td operator).
135    /// Translates the text matrix by (tx, ty).
136    MoveText(f32, f32),
137
138    /// Move text position and set leading (TD operator).
139    /// Equivalent to: -ty TL tx ty Td
140    MoveTextSetLeading(f32, f32),
141
142    /// Set text matrix directly (Tm operator).
143    /// Parameters: [a, b, c, d, e, f] for transformation matrix.
144    SetTextMatrix(f32, f32, f32, f32, f32, f32),
145
146    /// Move to start of next line (T* operator).
147    /// Uses the current leading value set with TL.
148    NextLine,
149
150    // Text showing operators
151    /// Show text string (Tj operator).
152    /// The bytes are encoded according to the current font's encoding.
153    ShowText(Vec<u8>),
154
155    /// Show text with individual positioning (TJ operator).
156    /// Array elements can be strings or position adjustments.
157    ShowTextArray(Vec<TextElement>),
158
159    /// Move to next line and show text (' operator).
160    /// Equivalent to: T* string Tj
161    NextLineShowText(Vec<u8>),
162
163    /// Set spacing, move to next line, and show text (" operator).
164    /// Equivalent to: word_spacing Tw char_spacing Tc string '
165    SetSpacingNextLineShowText(f32, f32, Vec<u8>),
166
167    // Graphics state operators
168    /// Save current graphics state (q operator).
169    /// Pushes the entire graphics state onto a stack.
170    SaveGraphicsState,
171
172    /// Restore graphics state (Q operator).
173    /// Pops the graphics state from the stack.
174    RestoreGraphicsState,
175
176    /// Concatenate matrix to current transformation matrix (cm operator).
177    /// Modifies the CTM: CTM' = CTM × [a b c d e f]
178    SetTransformMatrix(f32, f32, f32, f32, f32, f32),
179
180    /// Set line width (w operator) in user space units.
181    SetLineWidth(f32),
182
183    /// Set line cap style (J operator).
184    /// 0=butt cap, 1=round cap, 2=projecting square cap
185    SetLineCap(i32),
186
187    /// Set line join style (j operator).
188    /// 0=miter join, 1=round join, 2=bevel join
189    SetLineJoin(i32),
190
191    /// Set miter limit (M operator).
192    /// Maximum ratio of miter length to line width.
193    SetMiterLimit(f32),
194
195    /// Set dash pattern (d operator).
196    /// Array of dash/gap lengths and starting phase.
197    SetDashPattern(Vec<f32>, f32),
198
199    /// Set rendering intent (ri operator).
200    /// Color rendering intent: /AbsoluteColorimetric, /RelativeColorimetric, /Saturation, /Perceptual
201    SetIntent(String),
202
203    /// Set flatness tolerance (i operator).
204    /// Maximum error when rendering curves as line segments.
205    SetFlatness(f32),
206
207    /// Set graphics state from parameter dictionary (gs operator).
208    /// References ExtGState resource dictionary.
209    SetGraphicsStateParams(String),
210
211    // Path construction operators
212    /// Begin new subpath at point (m operator).
213    MoveTo(f32, f32),
214
215    /// Append straight line segment (l operator).
216    LineTo(f32, f32),
217
218    /// Append cubic Bézier curve (c operator).
219    /// Control points: (x1,y1), (x2,y2), endpoint: (x3,y3)
220    CurveTo(f32, f32, f32, f32, f32, f32),
221
222    /// Append cubic Bézier curve with first control point = current point (v operator).
223    CurveToV(f32, f32, f32, f32),
224
225    /// Append cubic Bézier curve with second control point = endpoint (y operator).
226    CurveToY(f32, f32, f32, f32),
227
228    /// Close current subpath (h operator).
229    /// Appends straight line to starting point.
230    ClosePath,
231
232    /// Append rectangle as complete subpath (re operator).
233    /// Parameters: x, y, width, height
234    Rectangle(f32, f32, f32, f32),
235
236    // Path painting operators
237    /// Stroke the path (S operator).
238    Stroke,
239
240    /// Close and stroke the path (s operator).
241    /// Equivalent to: h S
242    CloseStroke,
243
244    /// Fill the path using nonzero winding rule (f or F operator).
245    Fill,
246
247    /// Fill the path using even-odd rule (f* operator).
248    FillEvenOdd,
249
250    /// Fill then stroke the path (B operator).
251    /// Uses nonzero winding rule.
252    FillStroke,
253
254    /// Fill then stroke using even-odd rule (B* operator).
255    FillStrokeEvenOdd,
256
257    /// Close, fill, and stroke the path (b operator).
258    /// Equivalent to: h B
259    CloseFillStroke,
260
261    /// Close, fill, and stroke using even-odd rule (b* operator).
262    CloseFillStrokeEvenOdd,
263
264    /// End path without filling or stroking (n operator).
265    /// Used primarily before clipping.
266    EndPath,
267
268    // Clipping path operators
269    Clip,        // W
270    ClipEvenOdd, // W*
271
272    // Color operators
273    /// Set stroking color space (CS operator).
274    /// References ColorSpace resource dictionary.
275    SetStrokingColorSpace(String),
276
277    /// Set non-stroking color space (cs operator).
278    /// References ColorSpace resource dictionary.
279    SetNonStrokingColorSpace(String),
280
281    /// Set stroking color (SC, SCN operators).
282    /// Number of components depends on current color space.
283    SetStrokingColor(Vec<f32>),
284
285    /// Set non-stroking color (sc, scn operators).
286    /// Number of components depends on current color space.
287    SetNonStrokingColor(Vec<f32>),
288
289    /// Set stroking color to DeviceGray (G operator).
290    /// 0.0 = black, 1.0 = white
291    SetStrokingGray(f32),
292
293    /// Set non-stroking color to DeviceGray (g operator).
294    SetNonStrokingGray(f32),
295
296    /// Set stroking color to DeviceRGB (RG operator).
297    /// Components range from 0.0 to 1.0.
298    SetStrokingRGB(f32, f32, f32),
299
300    /// Set non-stroking color to DeviceRGB (rg operator).
301    SetNonStrokingRGB(f32, f32, f32),
302
303    /// Set stroking color to DeviceCMYK (K operator).
304    SetStrokingCMYK(f32, f32, f32, f32),
305
306    /// Set non-stroking color to DeviceCMYK (k operator).
307    SetNonStrokingCMYK(f32, f32, f32, f32),
308
309    // Shading operators
310    ShadingFill(String), // sh
311
312    // Inline image operators
313    /// Begin inline image (BI operator)
314    BeginInlineImage,
315    /// Inline image with parsed dictionary and data
316    InlineImage {
317        /// Image parameters (width, height, colorspace, etc.)
318        params: HashMap<String, Object>,
319        /// Raw image data
320        data: Vec<u8>,
321    },
322
323    // XObject operators
324    /// Paint external object (Do operator).
325    /// References XObject resource dictionary (images, forms).
326    PaintXObject(String),
327
328    // Marked content operators
329    BeginMarkedContent(String),                                   // BMC
330    BeginMarkedContentWithProps(String, HashMap<String, String>), // BDC
331    EndMarkedContent,                                             // EMC
332    DefineMarkedContentPoint(String),                             // MP
333    DefineMarkedContentPointWithProps(String, HashMap<String, String>), // DP
334
335    // Compatibility operators
336    BeginCompatibility, // BX
337    EndCompatibility,   // EX
338}
339
340/// Represents a text element in a TJ array for ShowTextArray operations.
341///
342/// The TJ operator takes an array of strings and position adjustments,
343/// allowing fine control over character and word spacing.
344///
345/// # Example
346///
347/// ```rust
348/// use oxidize_pdf::parser::content::{TextElement, ContentOperation};
349///
350/// // TJ array: [(Hello) -50 (World)]
351/// let tj_array = vec![
352///     TextElement::Text(b"Hello".to_vec()),
353///     TextElement::Spacing(-50.0), // Move left 50 units
354///     TextElement::Text(b"World".to_vec()),
355/// ];
356/// let op = ContentOperation::ShowTextArray(tj_array);
357/// ```
358#[derive(Debug, Clone, PartialEq)]
359pub enum TextElement {
360    /// Text string to show
361    Text(Vec<u8>),
362    /// Position adjustment in thousandths of text space units
363    /// Negative values move to the right (decrease spacing)
364    Spacing(f32),
365}
366
367/// Token types in content streams
368#[derive(Debug, Clone, PartialEq)]
369pub(super) enum Token {
370    Number(f32),
371    Integer(i32),
372    String(Vec<u8>),
373    HexString(Vec<u8>),
374    Name(String),
375    Operator(String),
376    ArrayStart,
377    ArrayEnd,
378    DictStart,
379    DictEnd,
380}
381
382/// Content stream tokenizer
383pub struct ContentTokenizer<'a> {
384    input: &'a [u8],
385    position: usize,
386}
387
388impl<'a> ContentTokenizer<'a> {
389    /// Create a new tokenizer for the given input
390    pub fn new(input: &'a [u8]) -> Self {
391        Self { input, position: 0 }
392    }
393
394    /// Get the next token from the stream
395    pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
396        self.skip_whitespace();
397
398        if self.position >= self.input.len() {
399            return Ok(None);
400        }
401
402        let ch = self.input[self.position];
403
404        match ch {
405            // Numbers
406            b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
407
408            // Strings
409            b'(' => self.read_literal_string(),
410            b'<' => {
411                if self.peek_next() == Some(b'<') {
412                    self.position += 2;
413                    Ok(Some(Token::DictStart))
414                } else {
415                    self.read_hex_string()
416                }
417            }
418            b'>' => {
419                if self.peek_next() == Some(b'>') {
420                    self.position += 2;
421                    Ok(Some(Token::DictEnd))
422                } else {
423                    Err(ParseError::SyntaxError {
424                        position: self.position,
425                        message: "Unexpected '>'".to_string(),
426                    })
427                }
428            }
429
430            // Arrays
431            b'[' => {
432                self.position += 1;
433                Ok(Some(Token::ArrayStart))
434            }
435            b']' => {
436                self.position += 1;
437                Ok(Some(Token::ArrayEnd))
438            }
439
440            // Names
441            b'/' => self.read_name(),
442
443            // Operators or other tokens
444            _ => self.read_operator(),
445        }
446    }
447
448    fn skip_whitespace(&mut self) {
449        while self.position < self.input.len() {
450            match self.input[self.position] {
451                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
452                b'%' => self.skip_comment(),
453                _ => break,
454            }
455        }
456    }
457
458    fn skip_comment(&mut self) {
459        while self.position < self.input.len() && self.input[self.position] != b'\n' {
460            self.position += 1;
461        }
462    }
463
464    fn peek_next(&self) -> Option<u8> {
465        if self.position + 1 < self.input.len() {
466            Some(self.input[self.position + 1])
467        } else {
468            None
469        }
470    }
471
472    fn read_number(&mut self) -> ParseResult<Option<Token>> {
473        let start = self.position;
474        let mut has_dot = false;
475
476        // Handle optional sign
477        if self.position < self.input.len()
478            && (self.input[self.position] == b'+' || self.input[self.position] == b'-')
479        {
480            self.position += 1;
481        }
482
483        // Read digits and optional decimal point
484        while self.position < self.input.len() {
485            match self.input[self.position] {
486                b'0'..=b'9' => self.position += 1,
487                b'.' if !has_dot => {
488                    has_dot = true;
489                    self.position += 1;
490                }
491                _ => break,
492            }
493        }
494
495        let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
496            ParseError::SyntaxError {
497                position: start,
498                message: "Invalid number format".to_string(),
499            }
500        })?;
501
502        if has_dot {
503            let value = num_str
504                .parse::<f32>()
505                .map_err(|_| ParseError::SyntaxError {
506                    position: start,
507                    message: "Invalid float number".to_string(),
508                })?;
509            Ok(Some(Token::Number(value)))
510        } else {
511            let value = num_str
512                .parse::<i32>()
513                .map_err(|_| ParseError::SyntaxError {
514                    position: start,
515                    message: "Invalid integer number".to_string(),
516                })?;
517            Ok(Some(Token::Integer(value)))
518        }
519    }
520
521    fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
522        self.position += 1; // Skip opening '('
523        let mut result = Vec::new();
524        let mut paren_depth = 1;
525        let mut escape = false;
526
527        while self.position < self.input.len() && paren_depth > 0 {
528            let ch = self.input[self.position];
529            self.position += 1;
530
531            if escape {
532                match ch {
533                    b'n' => result.push(b'\n'),
534                    b'r' => result.push(b'\r'),
535                    b't' => result.push(b'\t'),
536                    b'b' => result.push(b'\x08'),
537                    b'f' => result.push(b'\x0C'),
538                    b'(' => result.push(b'('),
539                    b')' => result.push(b')'),
540                    b'\\' => result.push(b'\\'),
541                    b'0'..=b'7' => {
542                        // Octal escape sequence
543                        self.position -= 1;
544                        let octal_value = self.read_octal_escape()?;
545                        result.push(octal_value);
546                    }
547                    _ => result.push(ch), // Unknown escape, treat as literal
548                }
549                escape = false;
550            } else {
551                match ch {
552                    b'\\' => escape = true,
553                    b'(' => {
554                        paren_depth += 1;
555                        result.push(ch);
556                    }
557                    b')' => {
558                        paren_depth -= 1;
559                        if paren_depth > 0 {
560                            result.push(ch);
561                        }
562                    }
563                    _ => result.push(ch),
564                }
565            }
566        }
567
568        Ok(Some(Token::String(result)))
569    }
570
571    fn read_octal_escape(&mut self) -> ParseResult<u8> {
572        let mut value = 0u8;
573        let mut count = 0;
574
575        while count < 3 && self.position < self.input.len() {
576            match self.input[self.position] {
577                b'0'..=b'7' => {
578                    value = value * 8 + (self.input[self.position] - b'0');
579                    self.position += 1;
580                    count += 1;
581                }
582                _ => break,
583            }
584        }
585
586        Ok(value)
587    }
588
589    fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
590        self.position += 1; // Skip opening '<'
591        let mut result = Vec::new();
592        let mut nibble = None;
593
594        while self.position < self.input.len() {
595            let ch = self.input[self.position];
596
597            match ch {
598                b'>' => {
599                    self.position += 1;
600                    // Handle odd number of hex digits
601                    if let Some(n) = nibble {
602                        result.push(n << 4);
603                    }
604                    return Ok(Some(Token::HexString(result)));
605                }
606                b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
607                    let digit = if ch <= b'9' {
608                        ch - b'0'
609                    } else if ch <= b'F' {
610                        ch - b'A' + 10
611                    } else {
612                        ch - b'a' + 10
613                    };
614
615                    if let Some(n) = nibble {
616                        result.push((n << 4) | digit);
617                        nibble = None;
618                    } else {
619                        nibble = Some(digit);
620                    }
621                    self.position += 1;
622                }
623                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
624                    // Skip whitespace in hex strings
625                    self.position += 1;
626                }
627                _ => {
628                    return Err(ParseError::SyntaxError {
629                        position: self.position,
630                        message: format!("Invalid character in hex string: {:?}", ch as char),
631                    });
632                }
633            }
634        }
635
636        Err(ParseError::SyntaxError {
637            position: self.position,
638            message: "Unterminated hex string".to_string(),
639        })
640    }
641
642    fn read_name(&mut self) -> ParseResult<Option<Token>> {
643        self.position += 1; // Skip '/'
644        let start = self.position;
645
646        while self.position < self.input.len() {
647            let ch = self.input[self.position];
648            match ch {
649                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
650                | b']' | b'{' | b'}' | b'/' | b'%' => break,
651                b'#' => {
652                    // Handle hex escape in name
653                    self.position += 1;
654                    if self.position + 1 < self.input.len() {
655                        self.position += 2;
656                    }
657                }
658                _ => self.position += 1,
659            }
660        }
661
662        let name_bytes = &self.input[start..self.position];
663        let name = self.decode_name(name_bytes)?;
664        Ok(Some(Token::Name(name)))
665    }
666
667    fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
668        let mut result = Vec::new();
669        let mut i = 0;
670
671        while i < bytes.len() {
672            if bytes[i] == b'#' && i + 2 < bytes.len() {
673                // Hex escape
674                let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
675                    ParseError::SyntaxError {
676                        position: self.position,
677                        message: "Invalid hex escape in name".to_string(),
678                    }
679                })?;
680                let value =
681                    u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
682                        position: self.position,
683                        message: "Invalid hex escape in name".to_string(),
684                    })?;
685                result.push(value);
686                i += 3;
687            } else {
688                result.push(bytes[i]);
689                i += 1;
690            }
691        }
692
693        String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
694            position: self.position,
695            message: "Invalid UTF-8 in name".to_string(),
696        })
697    }
698
699    fn read_operator(&mut self) -> ParseResult<Option<Token>> {
700        let start = self.position;
701
702        while self.position < self.input.len() {
703            let ch = self.input[self.position];
704            match ch {
705                b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
706                | b']' | b'{' | b'}' | b'/' | b'%' => break,
707                _ => self.position += 1,
708            }
709        }
710
711        let op_bytes = &self.input[start..self.position];
712        let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
713            position: start,
714            message: "Invalid operator".to_string(),
715        })?;
716
717        Ok(Some(Token::Operator(op.to_string())))
718    }
719}
720
721/// High-level content stream parser.
722///
723/// Converts tokenized content streams into structured `ContentOperation` values.
724/// This parser handles the operand stack and operator parsing according to PDF specifications.
725///
726/// # Usage
727///
728/// The parser is typically used through its static methods:
729///
730/// ```rust
731/// use oxidize_pdf::parser::content::ContentParser;
732///
733/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
734/// let content = b"q 1 0 0 1 50 50 cm 100 100 200 150 re S Q";
735/// let operations = ContentParser::parse(content)?;
736/// # Ok(())
737/// # }
738/// ```
739pub struct ContentParser {
740    tokens: Vec<Token>,
741    position: usize,
742}
743
744impl ContentParser {
745    /// Create a new content parser
746    pub fn new(_content: &[u8]) -> Self {
747        Self {
748            tokens: Vec::new(),
749            position: 0,
750        }
751    }
752
753    /// Parse a content stream into a vector of operators.
754    ///
755    /// This is a convenience method that creates a parser and processes the entire stream.
756    ///
757    /// # Arguments
758    ///
759    /// * `content` - Raw content stream bytes (may be compressed)
760    ///
761    /// # Returns
762    ///
763    /// A vector of parsed `ContentOperation` values in the order they appear.
764    ///
765    /// # Errors
766    ///
767    /// Returns an error if:
768    /// - Invalid operator syntax is encountered
769    /// - Operators have incorrect number/type of operands
770    /// - Unknown operators are found
771    ///
772    /// # Example
773    ///
774    /// ```rust
775    /// use oxidize_pdf::parser::content::{ContentParser, ContentOperation};
776    ///
777    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
778    /// let content = b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET";
779    /// let operations = ContentParser::parse(content)?;
780    ///
781    /// assert_eq!(operations.len(), 5);
782    /// assert!(matches!(operations[0], ContentOperation::BeginText));
783    /// # Ok(())
784    /// # }
785    /// ```
786    pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
787        Self::parse_content(content)
788    }
789
790    /// Parse a content stream into a vector of operators.
791    ///
792    /// This method tokenizes the input and converts it to operations.
793    /// It handles the PDF postfix notation where operands precede operators.
794    pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
795        let mut tokenizer = ContentTokenizer::new(content);
796        let mut tokens = Vec::new();
797
798        // Tokenize the entire stream
799        while let Some(token) = tokenizer.next_token()? {
800            tokens.push(token);
801        }
802
803        let mut parser = Self {
804            tokens,
805            position: 0,
806        };
807
808        parser.parse_operators()
809    }
810
811    fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
812        let mut operators = Vec::new();
813        let mut operand_stack: Vec<Token> = Vec::new();
814
815        while self.position < self.tokens.len() {
816            let token = self.tokens[self.position].clone();
817            self.position += 1;
818
819            match &token {
820                Token::Operator(op) => {
821                    let operator = self.parse_operator(op, &mut operand_stack)?;
822                    operators.push(operator);
823                }
824                _ => {
825                    // Not an operator, push to operand stack
826                    operand_stack.push(token);
827                }
828            }
829        }
830
831        Ok(operators)
832    }
833
834    fn parse_operator(
835        &mut self,
836        op: &str,
837        operands: &mut Vec<Token>,
838    ) -> ParseResult<ContentOperation> {
839        let operator = match op {
840            // Text object operators
841            "BT" => ContentOperation::BeginText,
842            "ET" => ContentOperation::EndText,
843
844            // Text state operators
845            "Tc" => {
846                let spacing = self.pop_number(operands)?;
847                ContentOperation::SetCharSpacing(spacing)
848            }
849            "Tw" => {
850                let spacing = self.pop_number(operands)?;
851                ContentOperation::SetWordSpacing(spacing)
852            }
853            "Tz" => {
854                let scale = self.pop_number(operands)?;
855                ContentOperation::SetHorizontalScaling(scale)
856            }
857            "TL" => {
858                let leading = self.pop_number(operands)?;
859                ContentOperation::SetLeading(leading)
860            }
861            "Tf" => {
862                let size = self.pop_number(operands)?;
863                let font = self.pop_name(operands)?;
864                ContentOperation::SetFont(font, size)
865            }
866            "Tr" => {
867                let mode = self.pop_integer(operands)?;
868                ContentOperation::SetTextRenderMode(mode)
869            }
870            "Ts" => {
871                let rise = self.pop_number(operands)?;
872                ContentOperation::SetTextRise(rise)
873            }
874
875            // Text positioning operators
876            "Td" => {
877                let ty = self.pop_number(operands)?;
878                let tx = self.pop_number(operands)?;
879                ContentOperation::MoveText(tx, ty)
880            }
881            "TD" => {
882                let ty = self.pop_number(operands)?;
883                let tx = self.pop_number(operands)?;
884                ContentOperation::MoveTextSetLeading(tx, ty)
885            }
886            "Tm" => {
887                let f = self.pop_number(operands)?;
888                let e = self.pop_number(operands)?;
889                let d = self.pop_number(operands)?;
890                let c = self.pop_number(operands)?;
891                let b = self.pop_number(operands)?;
892                let a = self.pop_number(operands)?;
893                ContentOperation::SetTextMatrix(a, b, c, d, e, f)
894            }
895            "T*" => ContentOperation::NextLine,
896
897            // Text showing operators
898            "Tj" => {
899                let text = self.pop_string(operands)?;
900                ContentOperation::ShowText(text)
901            }
902            "TJ" => {
903                let array = self.pop_array(operands)?;
904                let elements = self.parse_text_array(array)?;
905                ContentOperation::ShowTextArray(elements)
906            }
907            "'" => {
908                let text = self.pop_string(operands)?;
909                ContentOperation::NextLineShowText(text)
910            }
911            "\"" => {
912                let text = self.pop_string(operands)?;
913                let aw = self.pop_number(operands)?;
914                let ac = self.pop_number(operands)?;
915                ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
916            }
917
918            // Graphics state operators
919            "q" => ContentOperation::SaveGraphicsState,
920            "Q" => ContentOperation::RestoreGraphicsState,
921            "cm" => {
922                let f = self.pop_number(operands)?;
923                let e = self.pop_number(operands)?;
924                let d = self.pop_number(operands)?;
925                let c = self.pop_number(operands)?;
926                let b = self.pop_number(operands)?;
927                let a = self.pop_number(operands)?;
928                ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
929            }
930            "w" => {
931                let width = self.pop_number(operands)?;
932                ContentOperation::SetLineWidth(width)
933            }
934            "J" => {
935                let cap = self.pop_integer(operands)?;
936                ContentOperation::SetLineCap(cap)
937            }
938            "j" => {
939                let join = self.pop_integer(operands)?;
940                ContentOperation::SetLineJoin(join)
941            }
942            "M" => {
943                let limit = self.pop_number(operands)?;
944                ContentOperation::SetMiterLimit(limit)
945            }
946            "d" => {
947                let phase = self.pop_number(operands)?;
948                let array = self.pop_array(operands)?;
949                let pattern = self.parse_dash_array(array)?;
950                ContentOperation::SetDashPattern(pattern, phase)
951            }
952            "ri" => {
953                let intent = self.pop_name(operands)?;
954                ContentOperation::SetIntent(intent)
955            }
956            "i" => {
957                let flatness = self.pop_number(operands)?;
958                ContentOperation::SetFlatness(flatness)
959            }
960            "gs" => {
961                let name = self.pop_name(operands)?;
962                ContentOperation::SetGraphicsStateParams(name)
963            }
964
965            // Path construction operators
966            "m" => {
967                let y = self.pop_number(operands)?;
968                let x = self.pop_number(operands)?;
969                ContentOperation::MoveTo(x, y)
970            }
971            "l" => {
972                let y = self.pop_number(operands)?;
973                let x = self.pop_number(operands)?;
974                ContentOperation::LineTo(x, y)
975            }
976            "c" => {
977                let y3 = self.pop_number(operands)?;
978                let x3 = self.pop_number(operands)?;
979                let y2 = self.pop_number(operands)?;
980                let x2 = self.pop_number(operands)?;
981                let y1 = self.pop_number(operands)?;
982                let x1 = self.pop_number(operands)?;
983                ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
984            }
985            "v" => {
986                let y3 = self.pop_number(operands)?;
987                let x3 = self.pop_number(operands)?;
988                let y2 = self.pop_number(operands)?;
989                let x2 = self.pop_number(operands)?;
990                ContentOperation::CurveToV(x2, y2, x3, y3)
991            }
992            "y" => {
993                let y3 = self.pop_number(operands)?;
994                let x3 = self.pop_number(operands)?;
995                let y1 = self.pop_number(operands)?;
996                let x1 = self.pop_number(operands)?;
997                ContentOperation::CurveToY(x1, y1, x3, y3)
998            }
999            "h" => ContentOperation::ClosePath,
1000            "re" => {
1001                let height = self.pop_number(operands)?;
1002                let width = self.pop_number(operands)?;
1003                let y = self.pop_number(operands)?;
1004                let x = self.pop_number(operands)?;
1005                ContentOperation::Rectangle(x, y, width, height)
1006            }
1007
1008            // Path painting operators
1009            "S" => ContentOperation::Stroke,
1010            "s" => ContentOperation::CloseStroke,
1011            "f" | "F" => ContentOperation::Fill,
1012            "f*" => ContentOperation::FillEvenOdd,
1013            "B" => ContentOperation::FillStroke,
1014            "B*" => ContentOperation::FillStrokeEvenOdd,
1015            "b" => ContentOperation::CloseFillStroke,
1016            "b*" => ContentOperation::CloseFillStrokeEvenOdd,
1017            "n" => ContentOperation::EndPath,
1018
1019            // Clipping path operators
1020            "W" => ContentOperation::Clip,
1021            "W*" => ContentOperation::ClipEvenOdd,
1022
1023            // Color operators
1024            "CS" => {
1025                let name = self.pop_name(operands)?;
1026                ContentOperation::SetStrokingColorSpace(name)
1027            }
1028            "cs" => {
1029                let name = self.pop_name(operands)?;
1030                ContentOperation::SetNonStrokingColorSpace(name)
1031            }
1032            "SC" | "SCN" => {
1033                let components = self.pop_color_components(operands)?;
1034                ContentOperation::SetStrokingColor(components)
1035            }
1036            "sc" | "scn" => {
1037                let components = self.pop_color_components(operands)?;
1038                ContentOperation::SetNonStrokingColor(components)
1039            }
1040            "G" => {
1041                let gray = self.pop_number(operands)?;
1042                ContentOperation::SetStrokingGray(gray)
1043            }
1044            "g" => {
1045                let gray = self.pop_number(operands)?;
1046                ContentOperation::SetNonStrokingGray(gray)
1047            }
1048            "RG" => {
1049                let b = self.pop_number(operands)?;
1050                let g = self.pop_number(operands)?;
1051                let r = self.pop_number(operands)?;
1052                ContentOperation::SetStrokingRGB(r, g, b)
1053            }
1054            "rg" => {
1055                let b = self.pop_number(operands)?;
1056                let g = self.pop_number(operands)?;
1057                let r = self.pop_number(operands)?;
1058                ContentOperation::SetNonStrokingRGB(r, g, b)
1059            }
1060            "K" => {
1061                let k = self.pop_number(operands)?;
1062                let y = self.pop_number(operands)?;
1063                let m = self.pop_number(operands)?;
1064                let c = self.pop_number(operands)?;
1065                ContentOperation::SetStrokingCMYK(c, m, y, k)
1066            }
1067            "k" => {
1068                let k = self.pop_number(operands)?;
1069                let y = self.pop_number(operands)?;
1070                let m = self.pop_number(operands)?;
1071                let c = self.pop_number(operands)?;
1072                ContentOperation::SetNonStrokingCMYK(c, m, y, k)
1073            }
1074
1075            // Shading operators
1076            "sh" => {
1077                let name = self.pop_name(operands)?;
1078                ContentOperation::ShadingFill(name)
1079            }
1080
1081            // XObject operators
1082            "Do" => {
1083                let name = self.pop_name(operands)?;
1084                ContentOperation::PaintXObject(name)
1085            }
1086
1087            // Marked content operators
1088            "BMC" => {
1089                let tag = self.pop_name(operands)?;
1090                ContentOperation::BeginMarkedContent(tag)
1091            }
1092            "BDC" => {
1093                let props = self.pop_dict_or_name(operands)?;
1094                let tag = self.pop_name(operands)?;
1095                ContentOperation::BeginMarkedContentWithProps(tag, props)
1096            }
1097            "EMC" => ContentOperation::EndMarkedContent,
1098            "MP" => {
1099                let tag = self.pop_name(operands)?;
1100                ContentOperation::DefineMarkedContentPoint(tag)
1101            }
1102            "DP" => {
1103                let props = self.pop_dict_or_name(operands)?;
1104                let tag = self.pop_name(operands)?;
1105                ContentOperation::DefineMarkedContentPointWithProps(tag, props)
1106            }
1107
1108            // Compatibility operators
1109            "BX" => ContentOperation::BeginCompatibility,
1110            "EX" => ContentOperation::EndCompatibility,
1111
1112            // Inline images are handled specially
1113            "BI" => {
1114                operands.clear(); // Clear any remaining operands
1115                self.parse_inline_image()?
1116            }
1117
1118            _ => {
1119                return Err(ParseError::SyntaxError {
1120                    position: self.position,
1121                    message: format!("Unknown operator: {op}"),
1122                });
1123            }
1124        };
1125
1126        operands.clear(); // Clear operands after processing
1127        Ok(operator)
1128    }
1129
1130    // Helper methods for popping operands
1131    fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
1132        match operands.pop() {
1133            Some(Token::Number(n)) => Ok(n),
1134            Some(Token::Integer(i)) => Ok(i as f32),
1135            _ => Err(ParseError::SyntaxError {
1136                position: self.position,
1137                message: "Expected number operand".to_string(),
1138            }),
1139        }
1140    }
1141
1142    fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
1143        match operands.pop() {
1144            Some(Token::Integer(i)) => Ok(i),
1145            _ => Err(ParseError::SyntaxError {
1146                position: self.position,
1147                message: "Expected integer operand".to_string(),
1148            }),
1149        }
1150    }
1151
1152    fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
1153        match operands.pop() {
1154            Some(Token::Name(n)) => Ok(n),
1155            _ => Err(ParseError::SyntaxError {
1156                position: self.position,
1157                message: "Expected name operand".to_string(),
1158            }),
1159        }
1160    }
1161
1162    fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
1163        match operands.pop() {
1164            Some(Token::String(s)) => Ok(s),
1165            Some(Token::HexString(s)) => Ok(s),
1166            _ => Err(ParseError::SyntaxError {
1167                position: self.position,
1168                message: "Expected string operand".to_string(),
1169            }),
1170        }
1171    }
1172
1173    fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
1174        // First check if we have an ArrayEnd at the top (which we should for a complete array)
1175        let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
1176        if has_array_end {
1177            operands.pop(); // Remove the ArrayEnd
1178        }
1179
1180        let mut array = Vec::new();
1181        let mut found_start = false;
1182
1183        // Pop tokens until we find ArrayStart
1184        while let Some(token) = operands.pop() {
1185            match token {
1186                Token::ArrayStart => {
1187                    found_start = true;
1188                    break;
1189                }
1190                Token::ArrayEnd => {
1191                    // Skip any additional ArrayEnd tokens (shouldn't happen in well-formed PDFs)
1192                    continue;
1193                }
1194                _ => array.push(token),
1195            }
1196        }
1197
1198        if !found_start {
1199            return Err(ParseError::SyntaxError {
1200                position: self.position,
1201                message: "Expected array".to_string(),
1202            });
1203        }
1204
1205        array.reverse(); // We collected in reverse order
1206        Ok(array)
1207    }
1208
1209    fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
1210        if let Some(token) = operands.pop() {
1211            match token {
1212                Token::Name(name) => {
1213                    // Name token - this is a reference to properties in the resource dictionary
1214                    // For now, we'll store it as a special entry to indicate it's a resource reference
1215                    let mut props = HashMap::new();
1216                    props.insert("__resource_ref".to_string(), name);
1217                    Ok(props)
1218                }
1219                Token::DictStart => {
1220                    // Inline dictionary - parse key-value pairs
1221                    let mut props = HashMap::new();
1222
1223                    // Look for dictionary entries in remaining operands
1224                    while let Some(value_token) = operands.pop() {
1225                        if matches!(value_token, Token::DictEnd) {
1226                            break;
1227                        }
1228
1229                        // Expect key-value pairs
1230                        if let Token::Name(key) = value_token {
1231                            if let Some(value_token) = operands.pop() {
1232                                let value = match value_token {
1233                                    Token::Name(name) => name,
1234                                    Token::String(s) => String::from_utf8_lossy(&s).to_string(),
1235                                    Token::Integer(i) => i.to_string(),
1236                                    Token::Number(f) => f.to_string(),
1237                                    _ => continue, // Skip unsupported value types
1238                                };
1239                                props.insert(key, value);
1240                            }
1241                        }
1242                    }
1243
1244                    Ok(props)
1245                }
1246                _ => {
1247                    // Unexpected token type, treat as empty properties
1248                    Ok(HashMap::new())
1249                }
1250            }
1251        } else {
1252            // No operand available
1253            Err(ParseError::SyntaxError {
1254                position: 0,
1255                message: "Expected dictionary or name for marked content properties".to_string(),
1256            })
1257        }
1258    }
1259
1260    fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
1261        let mut components = Vec::new();
1262
1263        // Pop all numeric values from the stack
1264        while let Some(token) = operands.last() {
1265            match token {
1266                Token::Number(n) => {
1267                    components.push(*n);
1268                    operands.pop();
1269                }
1270                Token::Integer(i) => {
1271                    components.push(*i as f32);
1272                    operands.pop();
1273                }
1274                _ => break,
1275            }
1276        }
1277
1278        components.reverse();
1279        Ok(components)
1280    }
1281
1282    fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
1283        let mut elements = Vec::new();
1284
1285        for token in tokens {
1286            match token {
1287                Token::String(s) | Token::HexString(s) => {
1288                    elements.push(TextElement::Text(s));
1289                }
1290                Token::Number(n) => {
1291                    elements.push(TextElement::Spacing(n));
1292                }
1293                Token::Integer(i) => {
1294                    elements.push(TextElement::Spacing(i as f32));
1295                }
1296                _ => {
1297                    return Err(ParseError::SyntaxError {
1298                        position: self.position,
1299                        message: "Invalid element in text array".to_string(),
1300                    });
1301                }
1302            }
1303        }
1304
1305        Ok(elements)
1306    }
1307
1308    fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
1309        let mut pattern = Vec::new();
1310
1311        for token in tokens {
1312            match token {
1313                Token::Number(n) => pattern.push(n),
1314                Token::Integer(i) => pattern.push(i as f32),
1315                _ => {
1316                    return Err(ParseError::SyntaxError {
1317                        position: self.position,
1318                        message: "Invalid element in dash array".to_string(),
1319                    });
1320                }
1321            }
1322        }
1323
1324        Ok(pattern)
1325    }
1326
1327    fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
1328        // Parse inline image dictionary until we find ID
1329        let mut params = HashMap::new();
1330
1331        while self.position < self.tokens.len() {
1332            // Check if we've reached the ID operator
1333            if let Token::Operator(op) = &self.tokens[self.position] {
1334                if op == "ID" {
1335                    self.position += 1;
1336                    break;
1337                }
1338            }
1339
1340            // Parse key-value pairs for image parameters
1341            // Keys are abbreviated in inline images:
1342            // /W -> Width, /H -> Height, /CS -> ColorSpace, /BPC -> BitsPerComponent
1343            // /F -> Filter, /DP -> DecodeParms, /IM -> ImageMask, /I -> Interpolate
1344            if let Token::Name(key) = &self.tokens[self.position] {
1345                self.position += 1;
1346                if self.position >= self.tokens.len() {
1347                    break;
1348                }
1349
1350                // Parse the value
1351                let value = match &self.tokens[self.position] {
1352                    Token::Integer(n) => Object::Integer(*n as i64),
1353                    Token::Number(n) => Object::Real(*n as f64),
1354                    Token::Name(s) => Object::Name(expand_inline_name(s)),
1355                    Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1356                    Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
1357                    _ => Object::Null,
1358                };
1359
1360                // Expand abbreviated keys to full names
1361                let full_key = expand_inline_key(key);
1362                params.insert(full_key, value);
1363                self.position += 1;
1364            } else {
1365                self.position += 1;
1366            }
1367        }
1368
1369        // Now we should be at the image data
1370        // Collect bytes until we find EI
1371        let mut data = Vec::new();
1372
1373        // For inline images, we need to read raw bytes until EI
1374        // This is tricky because EI could appear in the image data
1375        // We need to look for EI followed by a whitespace or operator
1376
1377        // Simplified approach: collect all tokens until we find EI operator
1378        while self.position < self.tokens.len() {
1379            if let Token::Operator(op) = &self.tokens[self.position] {
1380                if op == "EI" {
1381                    self.position += 1;
1382                    break;
1383                }
1384            }
1385
1386            // Convert token to bytes (simplified - real implementation would need raw byte access)
1387            match &self.tokens[self.position] {
1388                Token::String(bytes) => data.extend_from_slice(bytes),
1389                Token::HexString(bytes) => data.extend_from_slice(bytes),
1390                Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
1391                Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
1392                Token::Name(s) => data.extend_from_slice(s.as_bytes()),
1393                Token::Operator(s) if s != "EI" => data.extend_from_slice(s.as_bytes()),
1394                _ => {}
1395            }
1396            self.position += 1;
1397        }
1398
1399        Ok(ContentOperation::InlineImage { params, data })
1400    }
1401}
1402
1403/// Expand abbreviated inline image key names to full names
1404fn expand_inline_key(key: &str) -> String {
1405    match key {
1406        "W" => "Width".to_string(),
1407        "H" => "Height".to_string(),
1408        "CS" | "ColorSpace" => "ColorSpace".to_string(),
1409        "BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
1410        "F" => "Filter".to_string(),
1411        "DP" | "DecodeParms" => "DecodeParms".to_string(),
1412        "IM" => "ImageMask".to_string(),
1413        "I" => "Interpolate".to_string(),
1414        "Intent" => "Intent".to_string(),
1415        "D" => "Decode".to_string(),
1416        _ => key.to_string(),
1417    }
1418}
1419
1420/// Expand abbreviated inline image color space names
1421fn expand_inline_name(name: &str) -> String {
1422    match name {
1423        "G" => "DeviceGray".to_string(),
1424        "RGB" => "DeviceRGB".to_string(),
1425        "CMYK" => "DeviceCMYK".to_string(),
1426        "I" => "Indexed".to_string(),
1427        "AHx" => "ASCIIHexDecode".to_string(),
1428        "A85" => "ASCII85Decode".to_string(),
1429        "LZW" => "LZWDecode".to_string(),
1430        "Fl" => "FlateDecode".to_string(),
1431        "RL" => "RunLengthDecode".to_string(),
1432        "DCT" => "DCTDecode".to_string(),
1433        "CCF" => "CCITTFaxDecode".to_string(),
1434        _ => name.to_string(),
1435    }
1436}
1437
1438#[cfg(test)]
1439mod tests {
1440    use super::*;
1441
1442    #[test]
1443    fn test_tokenize_numbers() {
1444        let input = b"123 -45 3.14159 -0.5 .5";
1445        let mut tokenizer = ContentTokenizer::new(input);
1446
1447        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
1448        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
1449        assert_eq!(
1450            tokenizer.next_token().unwrap(),
1451            Some(Token::Number(3.14159))
1452        );
1453        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1454        assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1455        assert_eq!(tokenizer.next_token().unwrap(), None);
1456    }
1457
1458    #[test]
1459    fn test_tokenize_strings() {
1460        let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
1461        let mut tokenizer = ContentTokenizer::new(input);
1462
1463        assert_eq!(
1464            tokenizer.next_token().unwrap(),
1465            Some(Token::String(b"Hello World".to_vec()))
1466        );
1467        assert_eq!(
1468            tokenizer.next_token().unwrap(),
1469            Some(Token::String(b"Hello\nWorld".to_vec()))
1470        );
1471        assert_eq!(
1472            tokenizer.next_token().unwrap(),
1473            Some(Token::String(b"Nested (paren)".to_vec()))
1474        );
1475    }
1476
1477    #[test]
1478    fn test_tokenize_hex_strings() {
1479        let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
1480        let mut tokenizer = ContentTokenizer::new(input);
1481
1482        assert_eq!(
1483            tokenizer.next_token().unwrap(),
1484            Some(Token::HexString(b"Hello".to_vec()))
1485        );
1486        assert_eq!(
1487            tokenizer.next_token().unwrap(),
1488            Some(Token::HexString(b"Hello".to_vec()))
1489        );
1490    }
1491
1492    #[test]
1493    fn test_tokenize_names() {
1494        let input = b"/Name /Name#20with#20spaces /A#42C";
1495        let mut tokenizer = ContentTokenizer::new(input);
1496
1497        assert_eq!(
1498            tokenizer.next_token().unwrap(),
1499            Some(Token::Name("Name".to_string()))
1500        );
1501        assert_eq!(
1502            tokenizer.next_token().unwrap(),
1503            Some(Token::Name("Name with spaces".to_string()))
1504        );
1505        assert_eq!(
1506            tokenizer.next_token().unwrap(),
1507            Some(Token::Name("ABC".to_string()))
1508        );
1509    }
1510
1511    #[test]
1512    fn test_tokenize_operators() {
1513        let input = b"BT Tj ET q Q";
1514        let mut tokenizer = ContentTokenizer::new(input);
1515
1516        assert_eq!(
1517            tokenizer.next_token().unwrap(),
1518            Some(Token::Operator("BT".to_string()))
1519        );
1520        assert_eq!(
1521            tokenizer.next_token().unwrap(),
1522            Some(Token::Operator("Tj".to_string()))
1523        );
1524        assert_eq!(
1525            tokenizer.next_token().unwrap(),
1526            Some(Token::Operator("ET".to_string()))
1527        );
1528        assert_eq!(
1529            tokenizer.next_token().unwrap(),
1530            Some(Token::Operator("q".to_string()))
1531        );
1532        assert_eq!(
1533            tokenizer.next_token().unwrap(),
1534            Some(Token::Operator("Q".to_string()))
1535        );
1536    }
1537
1538    #[test]
1539    fn test_parse_text_operators() {
1540        let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
1541        let operators = ContentParser::parse(content).unwrap();
1542
1543        assert_eq!(operators.len(), 5);
1544        assert_eq!(operators[0], ContentOperation::BeginText);
1545        assert_eq!(
1546            operators[1],
1547            ContentOperation::SetFont("F1".to_string(), 12.0)
1548        );
1549        assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
1550        assert_eq!(
1551            operators[3],
1552            ContentOperation::ShowText(b"Hello World".to_vec())
1553        );
1554        assert_eq!(operators[4], ContentOperation::EndText);
1555    }
1556
1557    #[test]
1558    fn test_parse_graphics_operators() {
1559        let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
1560        let operators = ContentParser::parse(content).unwrap();
1561
1562        assert_eq!(operators.len(), 6);
1563        assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1564        assert_eq!(
1565            operators[1],
1566            ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1567        );
1568        assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
1569        assert_eq!(
1570            operators[3],
1571            ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
1572        );
1573        assert_eq!(operators[4], ContentOperation::Stroke);
1574        assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1575    }
1576
1577    #[test]
1578    fn test_parse_color_operators() {
1579        let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
1580        let operators = ContentParser::parse(content).unwrap();
1581
1582        assert_eq!(operators.len(), 3);
1583        assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
1584        assert_eq!(
1585            operators[1],
1586            ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
1587        );
1588        assert_eq!(
1589            operators[2],
1590            ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1591        );
1592    }
1593
1594    // Comprehensive tests for all ContentOperation variants
1595    mod comprehensive_tests {
1596        use super::*;
1597
1598        #[test]
1599        fn test_all_text_operators() {
1600            // Test basic text operators that work with current parser
1601            let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
1602            let operators = ContentParser::parse(content).unwrap();
1603
1604            assert_eq!(operators[0], ContentOperation::BeginText);
1605            assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
1606            assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
1607            assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
1608            assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
1609            assert_eq!(
1610                operators[5],
1611                ContentOperation::SetFont("F1".to_string(), 12.0)
1612            );
1613            assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
1614            assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
1615            assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
1616            assert_eq!(
1617                operators[9],
1618                ContentOperation::MoveTextSetLeading(50.0, 150.0)
1619            );
1620            assert_eq!(operators[10], ContentOperation::NextLine);
1621            assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
1622            assert_eq!(operators[12], ContentOperation::EndText);
1623        }
1624
1625        #[test]
1626        fn test_all_graphics_state_operators() {
1627            // Test basic graphics state operators without arrays
1628            let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
1629            let operators = ContentParser::parse(content).unwrap();
1630
1631            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1632            assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
1633            assert_eq!(
1634                operators[2],
1635                ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
1636            );
1637            assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
1638            assert_eq!(operators[4], ContentOperation::SetLineCap(1));
1639            assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
1640            assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
1641            assert_eq!(
1642                operators[7],
1643                ContentOperation::SetGraphicsStateParams("GS1".to_string())
1644            );
1645            assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
1646            assert_eq!(
1647                operators[9],
1648                ContentOperation::SetIntent("Perceptual".to_string())
1649            );
1650        }
1651
1652        #[test]
1653        fn test_all_path_construction_operators() {
1654            let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
1655            let operators = ContentParser::parse(content).unwrap();
1656
1657            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
1658            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
1659            assert_eq!(
1660                operators[2],
1661                ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
1662            );
1663            assert_eq!(
1664                operators[3],
1665                ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
1666            );
1667            assert_eq!(
1668                operators[4],
1669                ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
1670            );
1671            assert_eq!(operators[5], ContentOperation::ClosePath);
1672            assert_eq!(
1673                operators[6],
1674                ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
1675            );
1676        }
1677
1678        #[test]
1679        fn test_all_path_painting_operators() {
1680            let content = b"S s f F f* B B* b b* n W W*";
1681            let operators = ContentParser::parse(content).unwrap();
1682
1683            assert_eq!(operators[0], ContentOperation::Stroke);
1684            assert_eq!(operators[1], ContentOperation::CloseStroke);
1685            assert_eq!(operators[2], ContentOperation::Fill);
1686            assert_eq!(operators[3], ContentOperation::Fill); // F is alias for f
1687            assert_eq!(operators[4], ContentOperation::FillEvenOdd);
1688            assert_eq!(operators[5], ContentOperation::FillStroke);
1689            assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
1690            assert_eq!(operators[7], ContentOperation::CloseFillStroke);
1691            assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
1692            assert_eq!(operators[9], ContentOperation::EndPath);
1693            assert_eq!(operators[10], ContentOperation::Clip);
1694            assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
1695        }
1696
1697        #[test]
1698        fn test_all_color_operators() {
1699            // Test basic color operators that work with current parser
1700            let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
1701            let operators = ContentParser::parse(content).unwrap();
1702
1703            assert_eq!(
1704                operators[0],
1705                ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
1706            );
1707            assert_eq!(
1708                operators[1],
1709                ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
1710            );
1711            assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
1712            assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
1713            assert_eq!(
1714                operators[4],
1715                ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
1716            );
1717            assert_eq!(
1718                operators[5],
1719                ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
1720            );
1721            assert_eq!(
1722                operators[6],
1723                ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
1724            );
1725            assert_eq!(
1726                operators[7],
1727                ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
1728            );
1729            assert_eq!(
1730                operators[8],
1731                ContentOperation::ShadingFill("Shade1".to_string())
1732            );
1733        }
1734
1735        #[test]
1736        fn test_xobject_and_marked_content_operators() {
1737            // Test basic XObject and marked content operators
1738            let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
1739            let operators = ContentParser::parse(content).unwrap();
1740
1741            assert_eq!(
1742                operators[0],
1743                ContentOperation::PaintXObject("Image1".to_string())
1744            );
1745            assert_eq!(
1746                operators[1],
1747                ContentOperation::BeginMarkedContent("MC1".to_string())
1748            );
1749            assert_eq!(operators[2], ContentOperation::EndMarkedContent);
1750            assert_eq!(
1751                operators[3],
1752                ContentOperation::DefineMarkedContentPoint("MP1".to_string())
1753            );
1754            assert_eq!(operators[4], ContentOperation::BeginCompatibility);
1755            assert_eq!(operators[5], ContentOperation::EndCompatibility);
1756        }
1757
1758        #[test]
1759        fn test_complex_content_stream() {
1760            let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
1761            let operators = ContentParser::parse(content).unwrap();
1762
1763            assert_eq!(operators.len(), 8);
1764            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1765            assert_eq!(
1766                operators[1],
1767                ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
1768            );
1769            assert_eq!(operators[2], ContentOperation::BeginText);
1770            assert_eq!(
1771                operators[3],
1772                ContentOperation::SetFont("F1".to_string(), 12.0)
1773            );
1774            assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
1775            assert_eq!(
1776                operators[5],
1777                ContentOperation::ShowText(b"Complex".to_vec())
1778            );
1779            assert_eq!(operators[6], ContentOperation::EndText);
1780            assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
1781        }
1782
1783        #[test]
1784        fn test_tokenizer_whitespace_handling() {
1785            let input = b"  \t\n\r  BT  \t\n  /F1   12.5  \t Tf  \n\r  ET  ";
1786            let mut tokenizer = ContentTokenizer::new(input);
1787
1788            assert_eq!(
1789                tokenizer.next_token().unwrap(),
1790                Some(Token::Operator("BT".to_string()))
1791            );
1792            assert_eq!(
1793                tokenizer.next_token().unwrap(),
1794                Some(Token::Name("F1".to_string()))
1795            );
1796            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
1797            assert_eq!(
1798                tokenizer.next_token().unwrap(),
1799                Some(Token::Operator("Tf".to_string()))
1800            );
1801            assert_eq!(
1802                tokenizer.next_token().unwrap(),
1803                Some(Token::Operator("ET".to_string()))
1804            );
1805            assert_eq!(tokenizer.next_token().unwrap(), None);
1806        }
1807
1808        #[test]
1809        fn test_tokenizer_edge_cases() {
1810            // Test basic number formats that are actually supported
1811            let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
1812            let mut tokenizer = ContentTokenizer::new(input);
1813
1814            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
1815            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1816            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
1817            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
1818            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
1819            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
1820            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
1821            assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
1822        }
1823
1824        #[test]
1825        fn test_string_parsing_edge_cases() {
1826            let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
1827            let mut tokenizer = ContentTokenizer::new(input);
1828
1829            assert_eq!(
1830                tokenizer.next_token().unwrap(),
1831                Some(Token::String(b"Simple".to_vec()))
1832            );
1833            assert_eq!(
1834                tokenizer.next_token().unwrap(),
1835                Some(Token::String(b"With\\backslash".to_vec()))
1836            );
1837            assert_eq!(
1838                tokenizer.next_token().unwrap(),
1839                Some(Token::String(b"With)paren".to_vec()))
1840            );
1841            assert_eq!(
1842                tokenizer.next_token().unwrap(),
1843                Some(Token::String(b"With\newline".to_vec()))
1844            );
1845            assert_eq!(
1846                tokenizer.next_token().unwrap(),
1847                Some(Token::String(b"With\ttab".to_vec()))
1848            );
1849            assert_eq!(
1850                tokenizer.next_token().unwrap(),
1851                Some(Token::String(b"With\rcarriage".to_vec()))
1852            );
1853            assert_eq!(
1854                tokenizer.next_token().unwrap(),
1855                Some(Token::String(b"With\x08backspace".to_vec()))
1856            );
1857            assert_eq!(
1858                tokenizer.next_token().unwrap(),
1859                Some(Token::String(b"With\x0Cformfeed".to_vec()))
1860            );
1861            assert_eq!(
1862                tokenizer.next_token().unwrap(),
1863                Some(Token::String(b"With(leftparen".to_vec()))
1864            );
1865            assert_eq!(
1866                tokenizer.next_token().unwrap(),
1867                Some(Token::String(b"With)rightparen".to_vec()))
1868            );
1869        }
1870
1871        #[test]
1872        fn test_hex_string_parsing() {
1873            let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
1874            let mut tokenizer = ContentTokenizer::new(input);
1875
1876            assert_eq!(
1877                tokenizer.next_token().unwrap(),
1878                Some(Token::HexString(b"Hello".to_vec()))
1879            );
1880            assert_eq!(
1881                tokenizer.next_token().unwrap(),
1882                Some(Token::HexString(b"Hello".to_vec()))
1883            );
1884            assert_eq!(
1885                tokenizer.next_token().unwrap(),
1886                Some(Token::HexString(b"HelloW".to_vec()))
1887            );
1888            assert_eq!(
1889                tokenizer.next_token().unwrap(),
1890                Some(Token::HexString(b"Hello\x50".to_vec()))
1891            );
1892        }
1893
1894        #[test]
1895        fn test_name_parsing_edge_cases() {
1896            let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
1897            let mut tokenizer = ContentTokenizer::new(input);
1898
1899            assert_eq!(
1900                tokenizer.next_token().unwrap(),
1901                Some(Token::Name("Name".to_string()))
1902            );
1903            assert_eq!(
1904                tokenizer.next_token().unwrap(),
1905                Some(Token::Name("Name with spaces".to_string()))
1906            );
1907            assert_eq!(
1908                tokenizer.next_token().unwrap(),
1909                Some(Token::Name("Name#with#hash".to_string()))
1910            );
1911            assert_eq!(
1912                tokenizer.next_token().unwrap(),
1913                Some(Token::Name("Name/with/slash".to_string()))
1914            );
1915            assert_eq!(
1916                tokenizer.next_token().unwrap(),
1917                Some(Token::Name("EmptyName".to_string()))
1918            );
1919        }
1920
1921        #[test]
1922        fn test_operator_parsing_edge_cases() {
1923            let content = b"q q q Q Q Q BT BT ET ET";
1924            let operators = ContentParser::parse(content).unwrap();
1925
1926            assert_eq!(operators.len(), 10);
1927            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
1928            assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
1929            assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
1930            assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
1931            assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
1932            assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
1933            assert_eq!(operators[6], ContentOperation::BeginText);
1934            assert_eq!(operators[7], ContentOperation::BeginText);
1935            assert_eq!(operators[8], ContentOperation::EndText);
1936            assert_eq!(operators[9], ContentOperation::EndText);
1937        }
1938
1939        #[test]
1940        fn test_error_handling_insufficient_operands() {
1941            let content = b"100 Td"; // Missing y coordinate
1942            let result = ContentParser::parse(content);
1943            assert!(result.is_err());
1944        }
1945
1946        #[test]
1947        fn test_error_handling_invalid_operator() {
1948            let content = b"100 200 INVALID";
1949            let result = ContentParser::parse(content);
1950            assert!(result.is_err());
1951        }
1952
1953        #[test]
1954        fn test_error_handling_malformed_string() {
1955            // Test that the tokenizer handles malformed strings appropriately
1956            let input = b"(Unclosed string";
1957            let mut tokenizer = ContentTokenizer::new(input);
1958            let result = tokenizer.next_token();
1959            // The current implementation may not detect this as an error
1960            // so we'll just test that we get some result
1961            assert!(result.is_ok() || result.is_err());
1962        }
1963
1964        #[test]
1965        fn test_error_handling_malformed_hex_string() {
1966            let input = b"<48656C6C6G>";
1967            let mut tokenizer = ContentTokenizer::new(input);
1968            let result = tokenizer.next_token();
1969            assert!(result.is_err());
1970        }
1971
1972        #[test]
1973        fn test_error_handling_malformed_name() {
1974            let input = b"/Name#GG";
1975            let mut tokenizer = ContentTokenizer::new(input);
1976            let result = tokenizer.next_token();
1977            assert!(result.is_err());
1978        }
1979
1980        #[test]
1981        fn test_empty_content_stream() {
1982            let content = b"";
1983            let operators = ContentParser::parse(content).unwrap();
1984            assert_eq!(operators.len(), 0);
1985        }
1986
1987        #[test]
1988        fn test_whitespace_only_content_stream() {
1989            let content = b"   \t\n\r   ";
1990            let operators = ContentParser::parse(content).unwrap();
1991            assert_eq!(operators.len(), 0);
1992        }
1993
1994        #[test]
1995        fn test_mixed_integer_and_real_operands() {
1996            // Test with simple operands that work with current parser
1997            let content = b"100 200 m 150 200 l";
1998            let operators = ContentParser::parse(content).unwrap();
1999
2000            assert_eq!(operators.len(), 2);
2001            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2002            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2003        }
2004
2005        #[test]
2006        fn test_negative_operands() {
2007            let content = b"-100 -200 Td -50.5 -75.2 TD";
2008            let operators = ContentParser::parse(content).unwrap();
2009
2010            assert_eq!(operators.len(), 2);
2011            assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
2012            assert_eq!(
2013                operators[1],
2014                ContentOperation::MoveTextSetLeading(-50.5, -75.2)
2015            );
2016        }
2017
2018        #[test]
2019        fn test_large_numbers() {
2020            let content = b"999999.999999 -999999.999999 m";
2021            let operators = ContentParser::parse(content).unwrap();
2022
2023            assert_eq!(operators.len(), 1);
2024            assert_eq!(
2025                operators[0],
2026                ContentOperation::MoveTo(999999.999999, -999999.999999)
2027            );
2028        }
2029
2030        #[test]
2031        fn test_scientific_notation() {
2032            // Test with simple decimal numbers since scientific notation isn't implemented
2033            let content = b"123.45 -456.78 m";
2034            let operators = ContentParser::parse(content).unwrap();
2035
2036            assert_eq!(operators.len(), 1);
2037            assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
2038        }
2039
2040        #[test]
2041        fn test_show_text_array_complex() {
2042            // Test simple text array without complex syntax
2043            let content = b"(Hello) TJ";
2044            let result = ContentParser::parse(content);
2045            // This should fail since TJ expects array, but test the error handling
2046            assert!(result.is_err());
2047        }
2048
2049        #[test]
2050        fn test_dash_pattern_empty() {
2051            // Test simple dash pattern without array syntax
2052            let content = b"0 d";
2053            let result = ContentParser::parse(content);
2054            // This should fail since dash pattern needs array, but test the error handling
2055            assert!(result.is_err());
2056        }
2057
2058        #[test]
2059        fn test_dash_pattern_complex() {
2060            // Test simple dash pattern without complex array syntax
2061            let content = b"2.5 d";
2062            let result = ContentParser::parse(content);
2063            // This should fail since dash pattern needs array, but test the error handling
2064            assert!(result.is_err());
2065        }
2066
2067        #[test]
2068        fn test_pop_array_removes_array_end() {
2069            // Test that pop_array correctly handles ArrayEnd tokens
2070            let parser = ContentParser::new(b"");
2071
2072            // Test normal array: [1 2 3]
2073            let mut operands = vec![
2074                Token::ArrayStart,
2075                Token::Integer(1),
2076                Token::Integer(2),
2077                Token::Integer(3),
2078                Token::ArrayEnd,
2079            ];
2080            let result = parser.pop_array(&mut operands).unwrap();
2081            assert_eq!(result.len(), 3);
2082            assert!(operands.is_empty());
2083
2084            // Test array without ArrayEnd (backwards compatibility)
2085            let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
2086            let result = parser.pop_array(&mut operands).unwrap();
2087            assert_eq!(result.len(), 2);
2088            assert!(operands.is_empty());
2089        }
2090
2091        #[test]
2092        fn test_dash_array_parsing_valid() {
2093            // Test that parser correctly parses valid dash arrays
2094            let parser = ContentParser::new(b"");
2095
2096            // Test with valid numbers only
2097            let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
2098            let result = parser.parse_dash_array(valid_tokens).unwrap();
2099            assert_eq!(result, vec![3.0, 2.0]);
2100
2101            // Test empty dash array
2102            let empty_tokens = vec![];
2103            let result = parser.parse_dash_array(empty_tokens).unwrap();
2104            let expected: Vec<f32> = vec![];
2105            assert_eq!(result, expected);
2106        }
2107
2108        #[test]
2109        fn test_text_array_parsing_valid() {
2110            // Test that parser correctly parses valid text arrays
2111            let parser = ContentParser::new(b"");
2112
2113            // Test with valid elements only
2114            let valid_tokens = vec![
2115                Token::String(b"Hello".to_vec()),
2116                Token::Number(-100.0),
2117                Token::String(b"World".to_vec()),
2118            ];
2119            let result = parser.parse_text_array(valid_tokens).unwrap();
2120            assert_eq!(result.len(), 3);
2121        }
2122
2123        #[test]
2124        fn test_inline_image_handling() {
2125            let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
2126            let operators = ContentParser::parse(content).unwrap();
2127
2128            assert_eq!(operators.len(), 1);
2129            match &operators[0] {
2130                ContentOperation::InlineImage { params, data: _ } => {
2131                    // Check parsed parameters
2132                    assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
2133                    assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
2134                    assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
2135                    assert_eq!(
2136                        params.get("ColorSpace"),
2137                        Some(&Object::Name("DeviceRGB".to_string()))
2138                    );
2139                    // Data field is not captured, just verify params
2140                }
2141                _ => panic!("Expected InlineImage operation"),
2142            }
2143        }
2144
2145        #[test]
2146        fn test_inline_image_with_filter() {
2147            let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
2148            let operators = ContentParser::parse(content).unwrap();
2149
2150            assert_eq!(operators.len(), 1);
2151            match &operators[0] {
2152                ContentOperation::InlineImage { params, data: _ } => {
2153                    assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
2154                    assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
2155                    assert_eq!(
2156                        params.get("ColorSpace"),
2157                        Some(&Object::Name("DeviceGray".to_string()))
2158                    );
2159                    assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
2160                    assert_eq!(
2161                        params.get("Filter"),
2162                        Some(&Object::Name("ASCIIHexDecode".to_string()))
2163                    );
2164                }
2165                _ => panic!("Expected InlineImage operation"),
2166            }
2167        }
2168
2169        #[test]
2170        fn test_content_parser_performance() {
2171            let mut content = Vec::new();
2172            for i in 0..1000 {
2173                content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
2174            }
2175
2176            let start = std::time::Instant::now();
2177            let operators = ContentParser::parse(&content).unwrap();
2178            let duration = start.elapsed();
2179
2180            assert_eq!(operators.len(), 1000);
2181            assert!(duration.as_millis() < 100); // Should parse 1000 operators in under 100ms
2182        }
2183
2184        #[test]
2185        fn test_tokenizer_performance() {
2186            let mut input = Vec::new();
2187            for i in 0..1000 {
2188                input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
2189            }
2190
2191            let start = std::time::Instant::now();
2192            let mut tokenizer = ContentTokenizer::new(&input);
2193            let mut count = 0;
2194            while tokenizer.next_token().unwrap().is_some() {
2195                count += 1;
2196            }
2197            let duration = start.elapsed();
2198
2199            assert_eq!(count, 2000); // 1000 pairs of numbers
2200            assert!(duration.as_millis() < 50); // Should tokenize 2000 tokens in under 50ms
2201        }
2202
2203        #[test]
2204        fn test_memory_usage_large_content() {
2205            let mut content = Vec::new();
2206            for i in 0..10000 {
2207                content.extend_from_slice(
2208                    format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
2209                        .as_bytes(),
2210                );
2211            }
2212
2213            let operators = ContentParser::parse(&content).unwrap();
2214            assert_eq!(operators.len(), 10000);
2215
2216            // Verify all operations are CurveTo
2217            for op in operators {
2218                matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
2219            }
2220        }
2221
2222        #[test]
2223        fn test_concurrent_parsing() {
2224            use std::sync::Arc;
2225            use std::thread;
2226
2227            let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
2228            let handles: Vec<_> = (0..10)
2229                .map(|_| {
2230                    let content_clone = content.clone();
2231                    thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
2232                })
2233                .collect();
2234
2235            for handle in handles {
2236                let operators = handle.join().unwrap();
2237                assert_eq!(operators.len(), 5);
2238                assert_eq!(operators[0], ContentOperation::BeginText);
2239                assert_eq!(operators[4], ContentOperation::EndText);
2240            }
2241        }
2242
2243        // ========== NEW COMPREHENSIVE TESTS ==========
2244
2245        #[test]
2246        fn test_tokenizer_hex_string_edge_cases() {
2247            let mut tokenizer = ContentTokenizer::new(b"<>");
2248            let token = tokenizer.next_token().unwrap().unwrap();
2249            match token {
2250                Token::HexString(data) => assert!(data.is_empty()),
2251                _ => panic!("Expected empty hex string"),
2252            }
2253
2254            // Odd number of hex digits
2255            let mut tokenizer = ContentTokenizer::new(b"<123>");
2256            let token = tokenizer.next_token().unwrap().unwrap();
2257            match token {
2258                Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
2259                _ => panic!("Expected hex string with odd digits"),
2260            }
2261
2262            // Hex string with whitespace
2263            let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
2264            let token = tokenizer.next_token().unwrap().unwrap();
2265            match token {
2266                Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
2267                _ => panic!("Expected hex string with whitespace"),
2268            }
2269        }
2270
2271        #[test]
2272        fn test_tokenizer_literal_string_escape_sequences() {
2273            // Test all standard escape sequences
2274            let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
2275            let token = tokenizer.next_token().unwrap().unwrap();
2276            match token {
2277                Token::String(data) => {
2278                    assert_eq!(
2279                        data,
2280                        vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
2281                    );
2282                }
2283                _ => panic!("Expected string with escapes"),
2284            }
2285
2286            // Test octal escape sequences
2287            let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
2288            let token = tokenizer.next_token().unwrap().unwrap();
2289            match token {
2290                Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
2291                _ => panic!("Expected string with octal escapes"),
2292            }
2293        }
2294
2295        #[test]
2296        fn test_tokenizer_nested_parentheses() {
2297            let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
2298            let token = tokenizer.next_token().unwrap().unwrap();
2299            match token {
2300                Token::String(data) => {
2301                    assert_eq!(data, b"outer (inner) text");
2302                }
2303                _ => panic!("Expected string with nested parentheses"),
2304            }
2305
2306            // Multiple levels of nesting
2307            let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
2308            let token = tokenizer.next_token().unwrap().unwrap();
2309            match token {
2310                Token::String(data) => {
2311                    assert_eq!(data, b"level1 (level2 (level3) back2) back1");
2312                }
2313                _ => panic!("Expected string with deep nesting"),
2314            }
2315        }
2316
2317        #[test]
2318        fn test_tokenizer_name_hex_escapes() {
2319            let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
2320            let token = tokenizer.next_token().unwrap().unwrap();
2321            match token {
2322                Token::Name(name) => assert_eq!(name, "Name With Spaces"),
2323                _ => panic!("Expected name with hex escapes"),
2324            }
2325
2326            // Test various special characters
2327            let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
2328            let token = tokenizer.next_token().unwrap().unwrap();
2329            match token {
2330                Token::Name(name) => assert_eq!(name, "Special/()<>"),
2331                _ => panic!("Expected name with special character escapes"),
2332            }
2333        }
2334
2335        #[test]
2336        fn test_tokenizer_number_edge_cases() {
2337            // Very large integers
2338            let mut tokenizer = ContentTokenizer::new(b"2147483647");
2339            let token = tokenizer.next_token().unwrap().unwrap();
2340            match token {
2341                Token::Integer(n) => assert_eq!(n, 2147483647),
2342                _ => panic!("Expected large integer"),
2343            }
2344
2345            // Very small numbers
2346            let mut tokenizer = ContentTokenizer::new(b"0.00001");
2347            let token = tokenizer.next_token().unwrap().unwrap();
2348            match token {
2349                Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
2350                _ => panic!("Expected small float"),
2351            }
2352
2353            // Numbers starting with dot
2354            let mut tokenizer = ContentTokenizer::new(b".5");
2355            let token = tokenizer.next_token().unwrap().unwrap();
2356            match token {
2357                Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
2358                _ => panic!("Expected float starting with dot"),
2359            }
2360        }
2361
2362        #[test]
2363        fn test_parser_complex_path_operations() {
2364            let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
2365            let operators = ContentParser::parse(content).unwrap();
2366
2367            assert_eq!(operators.len(), 6);
2368            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2369            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
2370            assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
2371            assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
2372            assert_eq!(operators[4], ContentOperation::ClosePath);
2373            assert_eq!(operators[5], ContentOperation::Fill);
2374        }
2375
2376        #[test]
2377        fn test_parser_bezier_curves() {
2378            let content = b"100 100 150 50 200 150 c";
2379            let operators = ContentParser::parse(content).unwrap();
2380
2381            assert_eq!(operators.len(), 1);
2382            match &operators[0] {
2383                ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
2384                    // Values are parsed in reverse order: last 6 values for c operator
2385                    // Stack order: 100 100 150 50 200 150
2386                    // Pop order: x1=100, y1=100, x2=150, y2=50, x3=200, y3=150
2387                    assert!(x1.is_finite() && y1.is_finite());
2388                    assert!(x2.is_finite() && y2.is_finite());
2389                    assert!(x3.is_finite() && y3.is_finite());
2390                    // Verify we have 6 coordinate values
2391                    assert!(*x1 >= 50.0 && *x1 <= 200.0);
2392                    assert!(*y1 >= 50.0 && *y1 <= 200.0);
2393                }
2394                _ => panic!("Expected CurveTo operation"),
2395            }
2396        }
2397
2398        #[test]
2399        fn test_parser_color_operations() {
2400            let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
2401            let operators = ContentParser::parse(content).unwrap();
2402
2403            assert_eq!(operators.len(), 5);
2404            match &operators[0] {
2405                ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
2406                _ => panic!("Expected SetNonStrokingGray"),
2407            }
2408            match &operators[1] {
2409                ContentOperation::SetNonStrokingRGB(r, g, b) => {
2410                    assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
2411                }
2412                _ => panic!("Expected SetNonStrokingRGB"),
2413            }
2414        }
2415
2416        #[test]
2417        fn test_parser_text_positioning_advanced() {
2418            let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
2419            let operators = ContentParser::parse(content).unwrap();
2420
2421            assert_eq!(operators.len(), 7);
2422            assert_eq!(operators[0], ContentOperation::BeginText);
2423            match &operators[1] {
2424                ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
2425                    assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
2426                }
2427                _ => panic!("Expected SetTextMatrix"),
2428            }
2429            assert_eq!(operators[6], ContentOperation::EndText);
2430        }
2431
2432        #[test]
2433        fn test_parser_graphics_state_operations() {
2434            let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
2435            let operators = ContentParser::parse(content).unwrap();
2436
2437            assert_eq!(operators.len(), 7);
2438            assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
2439            match &operators[1] {
2440                ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
2441                    assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
2442                }
2443                _ => panic!("Expected SetTransformMatrix"),
2444            }
2445            assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
2446        }
2447
2448        #[test]
2449        fn test_parser_xobject_operations() {
2450            let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
2451            let operators = ContentParser::parse(content).unwrap();
2452
2453            assert_eq!(operators.len(), 3);
2454            for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
2455                match &operators[i] {
2456                    ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
2457                    _ => panic!("Expected PaintXObject"),
2458                }
2459            }
2460        }
2461
2462        #[test]
2463        fn test_parser_marked_content_operations() {
2464            let content = b"/P BMC (Tagged content) Tj EMC";
2465            let operators = ContentParser::parse(content).unwrap();
2466
2467            assert_eq!(operators.len(), 3);
2468            match &operators[0] {
2469                ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
2470                _ => panic!("Expected BeginMarkedContent"),
2471            }
2472            assert_eq!(operators[2], ContentOperation::EndMarkedContent);
2473        }
2474
2475        #[test]
2476        fn test_parser_error_handling_invalid_operators() {
2477            // Missing operands for move operator
2478            let content = b"m";
2479            let result = ContentParser::parse(content);
2480            assert!(result.is_err());
2481
2482            // Invalid hex string (no closing >)
2483            let content = b"<ABC DEF BT";
2484            let result = ContentParser::parse(content);
2485            assert!(result.is_err());
2486
2487            // Test that we can detect actual parsing errors
2488            let content = b"100 200 300"; // Numbers without operator should parse ok
2489            let result = ContentParser::parse(content);
2490            assert!(result.is_ok()); // This should actually be ok since no operator is attempted
2491        }
2492
2493        #[test]
2494        fn test_parser_whitespace_tolerance() {
2495            let content = b"  \n\t  100   \r\n  200  \t m  \n";
2496            let operators = ContentParser::parse(content).unwrap();
2497
2498            assert_eq!(operators.len(), 1);
2499            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2500        }
2501
2502        #[test]
2503        fn test_tokenizer_comment_handling() {
2504            let content = b"100 % This is a comment\n200 m % Another comment";
2505            let operators = ContentParser::parse(content).unwrap();
2506
2507            assert_eq!(operators.len(), 1);
2508            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2509        }
2510
2511        #[test]
2512        fn test_parser_stream_with_binary_data() {
2513            // Test content stream with comment containing binary-like data
2514            let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
2515
2516            let operators = ContentParser::parse(content).unwrap();
2517            assert_eq!(operators.len(), 2);
2518            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2519            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2520        }
2521
2522        #[test]
2523        fn test_tokenizer_array_parsing() {
2524            // Test simple operations that don't require complex array parsing
2525            let content = b"100 200 m 150 250 l";
2526            let operators = ContentParser::parse(content).unwrap();
2527
2528            assert_eq!(operators.len(), 2);
2529            assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
2530            assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
2531        }
2532
2533        #[test]
2534        fn test_parser_rectangle_operations() {
2535            let content = b"10 20 100 50 re 0 0 200 300 re";
2536            let operators = ContentParser::parse(content).unwrap();
2537
2538            assert_eq!(operators.len(), 2);
2539            match &operators[0] {
2540                ContentOperation::Rectangle(x, y, width, height) => {
2541                    assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
2542                }
2543                _ => panic!("Expected Rectangle operation"),
2544            }
2545            match &operators[1] {
2546                ContentOperation::Rectangle(x, y, width, height) => {
2547                    assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
2548                }
2549                _ => panic!("Expected Rectangle operation"),
2550            }
2551        }
2552
2553        #[test]
2554        fn test_parser_clipping_operations() {
2555            let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
2556            let operators = ContentParser::parse(content).unwrap();
2557
2558            assert_eq!(operators.len(), 6);
2559            assert_eq!(operators[1], ContentOperation::Clip);
2560            assert_eq!(operators[2], ContentOperation::EndPath);
2561            assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
2562            assert_eq!(operators[5], ContentOperation::EndPath);
2563        }
2564
2565        #[test]
2566        fn test_parser_painting_operations() {
2567            let content = b"S s f f* B B* b b*";
2568            let operators = ContentParser::parse(content).unwrap();
2569
2570            assert_eq!(operators.len(), 8);
2571            assert_eq!(operators[0], ContentOperation::Stroke);
2572            assert_eq!(operators[1], ContentOperation::CloseStroke);
2573            assert_eq!(operators[2], ContentOperation::Fill);
2574            assert_eq!(operators[3], ContentOperation::FillEvenOdd);
2575            assert_eq!(operators[4], ContentOperation::FillStroke);
2576            assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
2577            assert_eq!(operators[6], ContentOperation::CloseFillStroke);
2578            assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
2579        }
2580
2581        #[test]
2582        fn test_parser_line_style_operations() {
2583            let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
2584            let operators = ContentParser::parse(content).unwrap();
2585
2586            assert_eq!(operators.len(), 5);
2587            assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
2588            assert_eq!(operators[1], ContentOperation::SetLineCap(1));
2589            assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
2590            assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
2591            // Dash pattern test would need array support
2592        }
2593
2594        #[test]
2595        fn test_parser_text_state_operations() {
2596            let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
2597            let operators = ContentParser::parse(content).unwrap();
2598
2599            assert_eq!(operators.len(), 5);
2600            assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
2601            assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
2602            assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
2603            assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
2604            assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
2605        }
2606
2607        #[test]
2608        fn test_parser_unicode_text() {
2609            let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
2610            let operators = ContentParser::parse(content).unwrap();
2611
2612            assert_eq!(operators.len(), 3);
2613            assert_eq!(operators[0], ContentOperation::BeginText);
2614            match &operators[1] {
2615                ContentOperation::ShowText(text) => {
2616                    assert!(text.len() > 5); // Should contain Unicode bytes
2617                }
2618                _ => panic!("Expected ShowText operation"),
2619            }
2620            assert_eq!(operators[2], ContentOperation::EndText);
2621        }
2622
2623        #[test]
2624        fn test_parser_stress_test_large_coordinates() {
2625            let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
2626            let operators = ContentParser::parse(content).unwrap();
2627
2628            assert_eq!(operators.len(), 1);
2629            match &operators[0] {
2630                ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
2631                    assert!((*_x1 - 999999.999).abs() < 0.1);
2632                    assert!((*_y1 - (-999999.999)).abs() < 0.1);
2633                    assert!((*_x3 - 999999.999).abs() < 0.1);
2634                }
2635                _ => panic!("Expected CurveTo operation"),
2636            }
2637        }
2638
2639        #[test]
2640        fn test_parser_empty_content_stream() {
2641            let content = b"";
2642            let operators = ContentParser::parse(content).unwrap();
2643            assert!(operators.is_empty());
2644
2645            let content = b"   \n\t\r   ";
2646            let operators = ContentParser::parse(content).unwrap();
2647            assert!(operators.is_empty());
2648        }
2649
2650        #[test]
2651        fn test_tokenizer_error_recovery() {
2652            // Test that parser can handle malformed but recoverable content
2653            let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
2654            let result = ContentParser::parse(content);
2655            // Should either parse successfully or fail gracefully
2656            assert!(result.is_ok() || result.is_err());
2657        }
2658
2659        #[test]
2660        fn test_parser_optimization_repeated_operations() {
2661            // Test performance with many repeated operations
2662            let mut content = Vec::new();
2663            for i in 0..1000 {
2664                content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
2665            }
2666
2667            let start = std::time::Instant::now();
2668            let operators = ContentParser::parse(&content).unwrap();
2669            let duration = start.elapsed();
2670
2671            assert_eq!(operators.len(), 1000);
2672            assert!(duration.as_millis() < 200); // Should be fast
2673        }
2674
2675        #[test]
2676        fn test_parser_memory_efficiency_large_strings() {
2677            // Test with large text content
2678            let large_text = "A".repeat(10000);
2679            let content = format!("BT ({}) Tj ET", large_text);
2680            let operators = ContentParser::parse(content.as_bytes()).unwrap();
2681
2682            assert_eq!(operators.len(), 3);
2683            match &operators[1] {
2684                ContentOperation::ShowText(text) => {
2685                    assert_eq!(text.len(), 10000);
2686                }
2687                _ => panic!("Expected ShowText operation"),
2688            }
2689        }
2690    }
2691
2692    #[test]
2693    fn test_content_stream_too_large() {
2694        // Test handling of very large content streams (covering potential size limits)
2695        let mut large_content = Vec::new();
2696
2697        // Create a content stream with many operations
2698        for i in 0..10000 {
2699            large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
2700        }
2701        large_content.extend_from_slice(b"S");
2702
2703        // Should handle large content without panic
2704        let result = ContentParser::parse_content(&large_content);
2705        assert!(result.is_ok());
2706
2707        let operations = result.unwrap();
2708        // Should have many MoveTo operations plus one Stroke
2709        assert!(operations.len() > 10000);
2710    }
2711
2712    #[test]
2713    fn test_invalid_operator_handling() {
2714        // Test parsing with invalid operators
2715        let content = b"100 200 INVALID_OP 300 400 m";
2716        let result = ContentParser::parse_content(content);
2717
2718        // Should either handle gracefully or return error
2719        if let Ok(operations) = result {
2720            // If it succeeds, should have at least the valid MoveTo
2721            assert!(operations
2722                .iter()
2723                .any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
2724        }
2725    }
2726
2727    #[test]
2728    fn test_nested_arrays_malformed() {
2729        // Test malformed nested arrays in TJ operator
2730        let content = b"[[(Hello] [World)]] TJ";
2731        let result = ContentParser::parse_content(content);
2732
2733        // Should handle malformed arrays gracefully
2734        assert!(result.is_ok() || result.is_err());
2735    }
2736
2737    #[test]
2738    fn test_escape_sequences_in_strings() {
2739        // Test various escape sequences in strings
2740        let test_cases = vec![
2741            (b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
2742            (b"(\\\\)".as_slice(), b"\\".as_slice()),
2743            (b"(\\(\\))".as_slice(), b"()".as_slice()),
2744            (b"(\\123)".as_slice(), b"S".as_slice()), // Octal 123 = 83 = 'S'
2745            (b"(\\0)".as_slice(), b"\0".as_slice()),
2746        ];
2747
2748        for (input, expected) in test_cases {
2749            let mut content = Vec::new();
2750            content.extend_from_slice(input);
2751            content.extend_from_slice(b" Tj");
2752
2753            let result = ContentParser::parse_content(&content);
2754            assert!(result.is_ok());
2755
2756            let operations = result.unwrap();
2757            if let ContentOperation::ShowText(text) = &operations[0] {
2758                assert_eq!(text, expected, "Failed for input: {:?}", input);
2759            } else {
2760                panic!("Expected ShowText operation");
2761            }
2762        }
2763    }
2764
2765    #[test]
2766    fn test_content_with_inline_images() {
2767        // Test handling of inline images in content stream
2768        let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
2769        let result = ContentParser::parse_content(content);
2770
2771        // Should handle inline images (even if not fully implemented)
2772        assert!(result.is_ok() || result.is_err());
2773    }
2774
2775    #[test]
2776    fn test_operator_with_missing_operands() {
2777        // Test operators with insufficient operands
2778        let test_cases = vec![
2779            b"Tj" as &[u8], // ShowText without string
2780            b"m",           // MoveTo without coordinates
2781            b"rg",          // SetRGBColor without values
2782            b"Tf",          // SetFont without name and size
2783        ];
2784
2785        for content in test_cases {
2786            let result = ContentParser::parse_content(content);
2787            // Should handle gracefully (error or skip)
2788            assert!(result.is_ok() || result.is_err());
2789        }
2790    }
2791}