use super::{ParseError, ParseResult};
use crate::objects::Object;
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub enum ContentOperation {
BeginText,
EndText,
SetCharSpacing(f32),
SetWordSpacing(f32),
SetHorizontalScaling(f32),
SetLeading(f32),
SetFont(String, f32),
SetTextRenderMode(i32),
SetTextRise(f32),
MoveText(f32, f32),
MoveTextSetLeading(f32, f32),
SetTextMatrix(f32, f32, f32, f32, f32, f32),
NextLine,
ShowText(Vec<u8>),
ShowTextArray(Vec<TextElement>),
NextLineShowText(Vec<u8>),
SetSpacingNextLineShowText(f32, f32, Vec<u8>),
SaveGraphicsState,
RestoreGraphicsState,
SetTransformMatrix(f32, f32, f32, f32, f32, f32),
SetLineWidth(f32),
SetLineCap(i32),
SetLineJoin(i32),
SetMiterLimit(f32),
SetDashPattern(Vec<f32>, f32),
SetIntent(String),
SetFlatness(f32),
SetGraphicsStateParams(String),
MoveTo(f32, f32),
LineTo(f32, f32),
CurveTo(f32, f32, f32, f32, f32, f32),
CurveToV(f32, f32, f32, f32),
CurveToY(f32, f32, f32, f32),
ClosePath,
Rectangle(f32, f32, f32, f32),
Stroke,
CloseStroke,
Fill,
FillEvenOdd,
FillStroke,
FillStrokeEvenOdd,
CloseFillStroke,
CloseFillStrokeEvenOdd,
EndPath,
Clip, ClipEvenOdd,
SetStrokingColorSpace(String),
SetNonStrokingColorSpace(String),
SetStrokingColor(Vec<f32>),
SetNonStrokingColor(Vec<f32>),
SetStrokingGray(f32),
SetNonStrokingGray(f32),
SetStrokingRGB(f32, f32, f32),
SetNonStrokingRGB(f32, f32, f32),
SetStrokingCMYK(f32, f32, f32, f32),
SetNonStrokingCMYK(f32, f32, f32, f32),
ShadingFill(String),
BeginInlineImage,
InlineImage {
params: HashMap<String, Object>,
data: Vec<u8>,
},
PaintXObject(String),
BeginMarkedContent(String), BeginMarkedContentWithProps(String, HashMap<String, String>), EndMarkedContent, DefineMarkedContentPoint(String), DefineMarkedContentPointWithProps(String, HashMap<String, String>),
BeginCompatibility, EndCompatibility, }
#[derive(Debug, Clone, PartialEq)]
pub enum TextElement {
Text(Vec<u8>),
Spacing(f32),
}
#[derive(Debug, Clone, PartialEq)]
pub(super) enum Token {
Number(f32),
Integer(i32),
String(Vec<u8>),
HexString(Vec<u8>),
Name(String),
Operator(String),
ArrayStart,
ArrayEnd,
DictStart,
DictEnd,
InlineImageData(Vec<u8>),
}
pub struct ContentTokenizer<'a> {
input: &'a [u8],
position: usize,
in_inline_image: bool,
}
impl<'a> ContentTokenizer<'a> {
pub fn new(input: &'a [u8]) -> Self {
Self {
input,
position: 0,
in_inline_image: false,
}
}
pub(super) fn next_token(&mut self) -> ParseResult<Option<Token>> {
if self.in_inline_image {
self.in_inline_image = false;
return self.read_inline_image_data();
}
self.skip_whitespace();
if self.position >= self.input.len() {
return Ok(None);
}
let ch = self.input[self.position];
match ch {
b'+' | b'-' | b'.' | b'0'..=b'9' => self.read_number(),
b'(' => self.read_literal_string(),
b'<' => {
if self.peek_next() == Some(b'<') {
self.position += 2;
Ok(Some(Token::DictStart))
} else {
self.read_hex_string()
}
}
b'>' => {
if self.peek_next() == Some(b'>') {
self.position += 2;
Ok(Some(Token::DictEnd))
} else {
Err(ParseError::SyntaxError {
position: self.position,
message: "Unexpected '>'".to_string(),
})
}
}
b'[' => {
self.position += 1;
Ok(Some(Token::ArrayStart))
}
b']' => {
self.position += 1;
Ok(Some(Token::ArrayEnd))
}
b'/' => self.read_name(),
b';' | b')' | b'{' | b'}' => {
self.position += 1;
self.next_token() }
_ => {
let token = self.read_operator()?;
if let Some(Token::Operator(ref op)) = token {
if op == "ID" {
self.in_inline_image = true;
}
}
Ok(token)
}
}
}
fn skip_whitespace(&mut self) {
while self.position < self.input.len() {
match self.input[self.position] {
b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => self.position += 1,
b'%' => self.skip_comment(),
_ => break,
}
}
}
fn skip_comment(&mut self) {
while self.position < self.input.len() && self.input[self.position] != b'\n' {
self.position += 1;
}
}
fn peek_next(&self) -> Option<u8> {
if self.position + 1 < self.input.len() {
Some(self.input[self.position + 1])
} else {
None
}
}
fn read_number(&mut self) -> ParseResult<Option<Token>> {
let start = self.position;
let mut has_dot = false;
if self.position < self.input.len()
&& (self.input[self.position] == b'+' || self.input[self.position] == b'-')
{
self.position += 1;
}
while self.position < self.input.len() {
match self.input[self.position] {
b'0'..=b'9' => self.position += 1,
b'.' if !has_dot => {
has_dot = true;
self.position += 1;
}
_ => break,
}
}
let num_str = std::str::from_utf8(&self.input[start..self.position]).map_err(|_| {
ParseError::SyntaxError {
position: start,
message: "Invalid number format".to_string(),
}
})?;
if has_dot {
let value = num_str
.parse::<f32>()
.map_err(|_| ParseError::SyntaxError {
position: start,
message: "Invalid float number".to_string(),
})?;
Ok(Some(Token::Number(value)))
} else {
let value = num_str
.parse::<i32>()
.map_err(|_| ParseError::SyntaxError {
position: start,
message: "Invalid integer number".to_string(),
})?;
Ok(Some(Token::Integer(value)))
}
}
fn read_literal_string(&mut self) -> ParseResult<Option<Token>> {
self.position += 1; let mut result = Vec::new();
let mut paren_depth = 1;
let mut escape = false;
while self.position < self.input.len() && paren_depth > 0 {
let ch = self.input[self.position];
self.position += 1;
if escape {
match ch {
b'n' => result.push(b'\n'),
b'r' => result.push(b'\r'),
b't' => result.push(b'\t'),
b'b' => result.push(b'\x08'),
b'f' => result.push(b'\x0C'),
b'(' => result.push(b'('),
b')' => result.push(b')'),
b'\\' => result.push(b'\\'),
b'0'..=b'7' => {
self.position -= 1;
let octal_value = self.read_octal_escape()?;
result.push(octal_value);
}
_ => result.push(ch), }
escape = false;
} else {
match ch {
b'\\' => escape = true,
b'(' => {
paren_depth += 1;
result.push(ch);
}
b')' => {
paren_depth -= 1;
if paren_depth > 0 {
result.push(ch);
}
}
_ => result.push(ch),
}
}
}
Ok(Some(Token::String(result)))
}
fn read_octal_escape(&mut self) -> ParseResult<u8> {
let mut value = 0u16;
let mut count = 0;
while count < 3 && self.position < self.input.len() {
match self.input[self.position] {
b'0'..=b'7' => {
value = value * 8 + u16::from(self.input[self.position] - b'0');
self.position += 1;
count += 1;
}
_ => break,
}
}
Ok(value as u8)
}
fn read_hex_string(&mut self) -> ParseResult<Option<Token>> {
self.position += 1; let mut result = Vec::new();
let mut nibble = None;
while self.position < self.input.len() {
let ch = self.input[self.position];
match ch {
b'>' => {
self.position += 1;
if let Some(n) = nibble {
result.push(n << 4);
}
return Ok(Some(Token::HexString(result)));
}
b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f' => {
let digit = if ch <= b'9' {
ch - b'0'
} else if ch <= b'F' {
ch - b'A' + 10
} else {
ch - b'a' + 10
};
if let Some(n) = nibble {
result.push((n << 4) | digit);
nibble = None;
} else {
nibble = Some(digit);
}
self.position += 1;
}
b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' => {
self.position += 1;
}
_ => {
return Err(ParseError::SyntaxError {
position: self.position,
message: format!("Invalid character in hex string: {:?}", ch as char),
});
}
}
}
Err(ParseError::SyntaxError {
position: self.position,
message: "Unterminated hex string".to_string(),
})
}
fn read_name(&mut self) -> ParseResult<Option<Token>> {
self.position += 1; let start = self.position;
while self.position < self.input.len() {
let ch = self.input[self.position];
match ch {
b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
| b']' | b'{' | b'}' | b'/' | b'%' => break,
b'#' => {
self.position += 1;
if self.position + 1 < self.input.len() {
self.position += 2;
}
}
_ => self.position += 1,
}
}
let name_bytes = &self.input[start..self.position];
let name = self.decode_name(name_bytes)?;
Ok(Some(Token::Name(name)))
}
fn decode_name(&self, bytes: &[u8]) -> ParseResult<String> {
let mut result = Vec::new();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'#' && i + 2 < bytes.len() {
let hex_str = std::str::from_utf8(&bytes[i + 1..i + 3]).map_err(|_| {
ParseError::SyntaxError {
position: self.position,
message: "Invalid hex escape in name".to_string(),
}
})?;
let value =
u8::from_str_radix(hex_str, 16).map_err(|_| ParseError::SyntaxError {
position: self.position,
message: "Invalid hex escape in name".to_string(),
})?;
result.push(value);
i += 3;
} else {
result.push(bytes[i]);
i += 1;
}
}
String::from_utf8(result).map_err(|_| ParseError::SyntaxError {
position: self.position,
message: "Invalid UTF-8 in name".to_string(),
})
}
fn read_operator(&mut self) -> ParseResult<Option<Token>> {
let start = self.position;
while self.position < self.input.len() {
let ch = self.input[self.position];
match ch {
b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'(' | b')' | b'<' | b'>' | b'['
| b']' | b'{' | b'}' | b'/' | b'%' | b';' => break,
_ => self.position += 1,
}
}
let op_bytes = &self.input[start..self.position];
let op = std::str::from_utf8(op_bytes).map_err(|_| ParseError::SyntaxError {
position: start,
message: "Invalid operator".to_string(),
})?;
Ok(Some(Token::Operator(op.to_string())))
}
fn read_inline_image_data(&mut self) -> ParseResult<Option<Token>> {
if self.position < self.input.len() {
let ch = self.input[self.position];
if ch == b' ' || ch == b'\n' || ch == b'\r' || ch == b'\t' {
self.position += 1;
if ch == b'\r'
&& self.position < self.input.len()
&& self.input[self.position] == b'\n'
{
self.position += 1;
}
}
}
let start = self.position;
while self.position + 1 < self.input.len() {
let preceded_by_whitespace = self.position == start
|| matches!(
self.input[self.position - 1],
b' ' | b'\t' | b'\r' | b'\n' | b'\x0C'
);
if preceded_by_whitespace
&& self.input[self.position] == b'E'
&& self.input[self.position + 1] == b'I'
{
let after_ei = self.position + 2;
let followed_by_boundary = after_ei >= self.input.len()
|| matches!(
self.input[after_ei],
b' ' | b'\t' | b'\r' | b'\n' | b'\x0C' | b'/' | b'<' | b'(' | b'[' | b'%'
);
if followed_by_boundary {
let mut end = self.position;
if end > start
&& matches!(self.input[end - 1], b' ' | b'\t' | b'\r' | b'\n' | b'\x0C')
{
end -= 1;
}
let data = self.input[start..end].to_vec();
self.position = after_ei; return Ok(Some(Token::InlineImageData(data)));
}
}
self.position += 1;
}
let data = self.input[start..].to_vec();
self.position = self.input.len();
Ok(Some(Token::InlineImageData(data)))
}
}
pub struct ContentParser {
tokens: Vec<Token>,
position: usize,
}
impl ContentParser {
pub fn new(_content: &[u8]) -> Self {
Self {
tokens: Vec::new(),
position: 0,
}
}
pub fn parse(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
Self::parse_content(content)
}
pub fn parse_content(content: &[u8]) -> ParseResult<Vec<ContentOperation>> {
let mut tokenizer = ContentTokenizer::new(content);
let mut tokens = Vec::new();
while let Some(token) = tokenizer.next_token()? {
tokens.push(token);
}
let mut parser = Self {
tokens,
position: 0,
};
parser.parse_operators()
}
fn parse_operators(&mut self) -> ParseResult<Vec<ContentOperation>> {
let mut operators = Vec::new();
let mut operand_stack: Vec<Token> = Vec::new();
while self.position < self.tokens.len() {
let token = self.tokens[self.position].clone();
self.position += 1;
match &token {
Token::Operator(op) => {
let operator = self.parse_operator(op, &mut operand_stack)?;
operators.push(operator);
}
_ => {
operand_stack.push(token);
}
}
}
Ok(operators)
}
fn parse_operator(
&mut self,
op: &str,
operands: &mut Vec<Token>,
) -> ParseResult<ContentOperation> {
let operator = match op {
"BT" => ContentOperation::BeginText,
"ET" => ContentOperation::EndText,
"Tc" => {
let spacing = self.pop_number(operands)?;
ContentOperation::SetCharSpacing(spacing)
}
"Tw" => {
let spacing = self.pop_number(operands)?;
ContentOperation::SetWordSpacing(spacing)
}
"Tz" => {
let scale = self.pop_number(operands)?;
ContentOperation::SetHorizontalScaling(scale)
}
"TL" => {
let leading = self.pop_number(operands)?;
ContentOperation::SetLeading(leading)
}
"Tf" => {
let size = self.pop_number(operands)?;
let font = self.pop_name(operands)?;
ContentOperation::SetFont(font, size)
}
"Tr" => {
let mode = self.pop_integer(operands)?;
ContentOperation::SetTextRenderMode(mode)
}
"Ts" => {
let rise = self.pop_number(operands)?;
ContentOperation::SetTextRise(rise)
}
"Td" => {
let ty = self.pop_number(operands)?;
let tx = self.pop_number(operands)?;
ContentOperation::MoveText(tx, ty)
}
"TD" => {
let ty = self.pop_number(operands)?;
let tx = self.pop_number(operands)?;
ContentOperation::MoveTextSetLeading(tx, ty)
}
"Tm" => {
let f = self.pop_number(operands)?;
let e = self.pop_number(operands)?;
let d = self.pop_number(operands)?;
let c = self.pop_number(operands)?;
let b = self.pop_number(operands)?;
let a = self.pop_number(operands)?;
ContentOperation::SetTextMatrix(a, b, c, d, e, f)
}
"T*" => ContentOperation::NextLine,
"Tj" => {
let text = self.pop_string(operands)?;
ContentOperation::ShowText(text)
}
"TJ" => {
let array = self.pop_array(operands)?;
let elements = self.parse_text_array(array)?;
ContentOperation::ShowTextArray(elements)
}
"'" => {
let text = self.pop_string(operands)?;
ContentOperation::NextLineShowText(text)
}
"\"" => {
let text = self.pop_string(operands)?;
let aw = self.pop_number(operands)?;
let ac = self.pop_number(operands)?;
ContentOperation::SetSpacingNextLineShowText(ac, aw, text)
}
"q" => ContentOperation::SaveGraphicsState,
"Q" => ContentOperation::RestoreGraphicsState,
"cm" => {
let f = self.pop_number(operands)?;
let e = self.pop_number(operands)?;
let d = self.pop_number(operands)?;
let c = self.pop_number(operands)?;
let b = self.pop_number(operands)?;
let a = self.pop_number(operands)?;
ContentOperation::SetTransformMatrix(a, b, c, d, e, f)
}
"w" => {
let width = self.pop_number(operands)?;
ContentOperation::SetLineWidth(width)
}
"J" => {
let cap = self.pop_integer(operands)?;
ContentOperation::SetLineCap(cap)
}
"j" => {
let join = self.pop_integer(operands)?;
ContentOperation::SetLineJoin(join)
}
"M" => {
let limit = self.pop_number(operands)?;
ContentOperation::SetMiterLimit(limit)
}
"d" => {
let phase = self.pop_number(operands)?;
let array = self.pop_array(operands)?;
let pattern = self.parse_dash_array(array)?;
ContentOperation::SetDashPattern(pattern, phase)
}
"ri" => {
let intent = self.pop_name(operands)?;
ContentOperation::SetIntent(intent)
}
"i" => {
let flatness = self.pop_number(operands)?;
ContentOperation::SetFlatness(flatness)
}
"gs" => {
let name = self.pop_name(operands)?;
ContentOperation::SetGraphicsStateParams(name)
}
"m" => {
let y = self.pop_number(operands)?;
let x = self.pop_number(operands)?;
ContentOperation::MoveTo(x, y)
}
"l" => {
let y = self.pop_number(operands)?;
let x = self.pop_number(operands)?;
ContentOperation::LineTo(x, y)
}
"c" => {
let y3 = self.pop_number(operands)?;
let x3 = self.pop_number(operands)?;
let y2 = self.pop_number(operands)?;
let x2 = self.pop_number(operands)?;
let y1 = self.pop_number(operands)?;
let x1 = self.pop_number(operands)?;
ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3)
}
"v" => {
let y3 = self.pop_number(operands)?;
let x3 = self.pop_number(operands)?;
let y2 = self.pop_number(operands)?;
let x2 = self.pop_number(operands)?;
ContentOperation::CurveToV(x2, y2, x3, y3)
}
"y" => {
let y3 = self.pop_number(operands)?;
let x3 = self.pop_number(operands)?;
let y1 = self.pop_number(operands)?;
let x1 = self.pop_number(operands)?;
ContentOperation::CurveToY(x1, y1, x3, y3)
}
"h" => ContentOperation::ClosePath,
"re" => {
let height = self.pop_number(operands)?;
let width = self.pop_number(operands)?;
let y = self.pop_number(operands)?;
let x = self.pop_number(operands)?;
ContentOperation::Rectangle(x, y, width, height)
}
"S" => ContentOperation::Stroke,
"s" => ContentOperation::CloseStroke,
"f" | "F" => ContentOperation::Fill,
"f*" => ContentOperation::FillEvenOdd,
"B" => ContentOperation::FillStroke,
"B*" => ContentOperation::FillStrokeEvenOdd,
"b" => ContentOperation::CloseFillStroke,
"b*" => ContentOperation::CloseFillStrokeEvenOdd,
"n" => ContentOperation::EndPath,
"W" => ContentOperation::Clip,
"W*" => ContentOperation::ClipEvenOdd,
"CS" => {
let name = self.pop_name(operands)?;
ContentOperation::SetStrokingColorSpace(name)
}
"cs" => {
let name = self.pop_name(operands)?;
ContentOperation::SetNonStrokingColorSpace(name)
}
"SC" | "SCN" => {
let components = self.pop_color_components(operands)?;
ContentOperation::SetStrokingColor(components)
}
"sc" | "scn" => {
let components = self.pop_color_components(operands)?;
ContentOperation::SetNonStrokingColor(components)
}
"G" => {
let gray = self.pop_number(operands)?;
ContentOperation::SetStrokingGray(gray)
}
"g" => {
let gray = self.pop_number(operands)?;
ContentOperation::SetNonStrokingGray(gray)
}
"RG" => {
let b = self.pop_number(operands)?;
let g = self.pop_number(operands)?;
let r = self.pop_number(operands)?;
ContentOperation::SetStrokingRGB(r, g, b)
}
"rg" => {
let b = self.pop_number(operands)?;
let g = self.pop_number(operands)?;
let r = self.pop_number(operands)?;
ContentOperation::SetNonStrokingRGB(r, g, b)
}
"K" => {
let k = self.pop_number(operands)?;
let y = self.pop_number(operands)?;
let m = self.pop_number(operands)?;
let c = self.pop_number(operands)?;
ContentOperation::SetStrokingCMYK(c, m, y, k)
}
"k" => {
let k = self.pop_number(operands)?;
let y = self.pop_number(operands)?;
let m = self.pop_number(operands)?;
let c = self.pop_number(operands)?;
ContentOperation::SetNonStrokingCMYK(c, m, y, k)
}
"sh" => {
let name = self.pop_name(operands)?;
ContentOperation::ShadingFill(name)
}
"Do" => {
let name = self.pop_name(operands)?;
ContentOperation::PaintXObject(name)
}
"BMC" => {
let tag = self.pop_name(operands)?;
ContentOperation::BeginMarkedContent(tag)
}
"BDC" => {
let props = self.pop_dict_or_name(operands)?;
let tag = self.pop_name(operands)?;
ContentOperation::BeginMarkedContentWithProps(tag, props)
}
"EMC" => ContentOperation::EndMarkedContent,
"MP" => {
let tag = self.pop_name(operands)?;
ContentOperation::DefineMarkedContentPoint(tag)
}
"DP" => {
let props = self.pop_dict_or_name(operands)?;
let tag = self.pop_name(operands)?;
ContentOperation::DefineMarkedContentPointWithProps(tag, props)
}
"BX" => ContentOperation::BeginCompatibility,
"EX" => ContentOperation::EndCompatibility,
"BI" => {
operands.clear(); self.parse_inline_image()?
}
_ => {
return Err(ParseError::SyntaxError {
position: self.position,
message: format!("Unknown operator: {op}"),
});
}
};
operands.clear(); Ok(operator)
}
fn pop_number(&self, operands: &mut Vec<Token>) -> ParseResult<f32> {
match operands.pop() {
Some(Token::Number(n)) => Ok(n),
Some(Token::Integer(i)) => Ok(i as f32),
_ => Err(ParseError::SyntaxError {
position: self.position,
message: "Expected number operand".to_string(),
}),
}
}
fn pop_integer(&self, operands: &mut Vec<Token>) -> ParseResult<i32> {
match operands.pop() {
Some(Token::Integer(i)) => Ok(i),
_ => Err(ParseError::SyntaxError {
position: self.position,
message: "Expected integer operand".to_string(),
}),
}
}
fn pop_name(&self, operands: &mut Vec<Token>) -> ParseResult<String> {
match operands.pop() {
Some(Token::Name(n)) => Ok(n),
_ => Err(ParseError::SyntaxError {
position: self.position,
message: "Expected name operand".to_string(),
}),
}
}
fn pop_string(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<u8>> {
match operands.pop() {
Some(Token::String(s)) => Ok(s),
Some(Token::HexString(s)) => Ok(s),
_ => Err(ParseError::SyntaxError {
position: self.position,
message: "Expected string operand".to_string(),
}),
}
}
fn pop_array(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<Token>> {
let has_array_end = matches!(operands.last(), Some(Token::ArrayEnd));
if has_array_end {
operands.pop(); }
let mut array = Vec::new();
let mut found_start = false;
while let Some(token) = operands.pop() {
match token {
Token::ArrayStart => {
found_start = true;
break;
}
Token::ArrayEnd => {
continue;
}
_ => array.push(token),
}
}
if !found_start {
return Err(ParseError::SyntaxError {
position: self.position,
message: "Expected array".to_string(),
});
}
array.reverse(); Ok(array)
}
fn pop_dict_or_name(&self, operands: &mut Vec<Token>) -> ParseResult<HashMap<String, String>> {
if let Some(token) = operands.pop() {
match token {
Token::Name(name) => {
let mut props = HashMap::new();
props.insert("__resource_ref".to_string(), name);
Ok(props)
}
Token::DictEnd => {
let mut props = HashMap::new();
while let Some(value_token) = operands.pop() {
if matches!(value_token, Token::DictStart) {
break;
}
let value = match &value_token {
Token::Name(name) => name.clone(),
Token::String(s) => String::from_utf8_lossy(s).to_string(),
Token::Integer(i) => i.to_string(),
Token::Number(f) => f.to_string(),
Token::ArrayEnd => {
let mut array_elements = Vec::new();
while let Some(arr_token) = operands.pop() {
match arr_token {
Token::ArrayStart => break,
Token::Name(n) => array_elements.push(n),
Token::String(s) => array_elements
.push(String::from_utf8_lossy(&s).to_string()),
Token::Integer(i) => array_elements.push(i.to_string()),
Token::Number(f) => array_elements.push(f.to_string()),
_ => {} }
}
array_elements.reverse();
format!("[{}]", array_elements.join(", "))
}
_ => continue, };
if let Some(Token::Name(key)) = operands.pop() {
props.insert(key, value);
}
}
Ok(props)
}
_ => {
Ok(HashMap::new())
}
}
} else {
Err(ParseError::SyntaxError {
position: 0,
message: "Expected dictionary or name for marked content properties".to_string(),
})
}
}
fn pop_color_components(&self, operands: &mut Vec<Token>) -> ParseResult<Vec<f32>> {
let mut components = Vec::new();
while let Some(token) = operands.last() {
match token {
Token::Number(n) => {
components.push(*n);
operands.pop();
}
Token::Integer(i) => {
components.push(*i as f32);
operands.pop();
}
_ => break,
}
}
components.reverse();
Ok(components)
}
fn parse_text_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<TextElement>> {
let mut elements = Vec::new();
for token in tokens {
match token {
Token::String(s) | Token::HexString(s) => {
elements.push(TextElement::Text(s));
}
Token::Number(n) => {
elements.push(TextElement::Spacing(n));
}
Token::Integer(i) => {
elements.push(TextElement::Spacing(i as f32));
}
_ => {
return Err(ParseError::SyntaxError {
position: self.position,
message: "Invalid element in text array".to_string(),
});
}
}
}
Ok(elements)
}
fn parse_dash_array(&self, tokens: Vec<Token>) -> ParseResult<Vec<f32>> {
let mut pattern = Vec::new();
for token in tokens {
match token {
Token::Number(n) => pattern.push(n),
Token::Integer(i) => pattern.push(i as f32),
_ => {
return Err(ParseError::SyntaxError {
position: self.position,
message: "Invalid element in dash array".to_string(),
});
}
}
}
Ok(pattern)
}
fn parse_inline_image(&mut self) -> ParseResult<ContentOperation> {
let mut params = HashMap::new();
while self.position < self.tokens.len() {
if let Token::Operator(op) = &self.tokens[self.position] {
if op == "ID" {
self.position += 1;
break;
}
}
if let Token::Name(key) = &self.tokens[self.position] {
self.position += 1;
if self.position >= self.tokens.len() {
break;
}
let value = match &self.tokens[self.position] {
Token::Integer(n) => Object::Integer(*n as i64),
Token::Number(n) => Object::Real(*n as f64),
Token::Name(s) => Object::Name(expand_inline_name(s)),
Token::String(s) => Object::String(String::from_utf8_lossy(s).to_string()),
Token::HexString(s) => Object::String(String::from_utf8_lossy(s).to_string()),
_ => Object::Null,
};
let full_key = expand_inline_key(key);
params.insert(full_key, value);
self.position += 1;
} else {
self.position += 1;
}
}
let data = if self.position < self.tokens.len() {
if let Token::InlineImageData(bytes) = &self.tokens[self.position] {
let d = bytes.clone();
self.position += 1;
d
} else {
self.collect_inline_image_data_from_tokens()?
}
} else {
Vec::new()
};
Ok(ContentOperation::InlineImage { params, data })
}
fn collect_inline_image_data_from_tokens(&mut self) -> ParseResult<Vec<u8>> {
let mut data = Vec::new();
while self.position < self.tokens.len() {
if let Token::Operator(op) = &self.tokens[self.position] {
if op == "EI" {
self.position += 1;
break;
}
}
match &self.tokens[self.position] {
Token::String(bytes) | Token::HexString(bytes) => {
data.extend_from_slice(bytes);
}
Token::Integer(n) => data.extend_from_slice(n.to_string().as_bytes()),
Token::Number(n) => data.extend_from_slice(n.to_string().as_bytes()),
Token::Name(s) | Token::Operator(s) => data.extend_from_slice(s.as_bytes()),
_ => {}
}
self.position += 1;
}
Ok(data)
}
}
fn expand_inline_key(key: &str) -> String {
match key {
"W" => "Width".to_string(),
"H" => "Height".to_string(),
"CS" | "ColorSpace" => "ColorSpace".to_string(),
"BPC" | "BitsPerComponent" => "BitsPerComponent".to_string(),
"F" => "Filter".to_string(),
"DP" | "DecodeParms" => "DecodeParms".to_string(),
"IM" => "ImageMask".to_string(),
"I" => "Interpolate".to_string(),
"Intent" => "Intent".to_string(),
"D" => "Decode".to_string(),
_ => key.to_string(),
}
}
fn expand_inline_name(name: &str) -> String {
match name {
"G" => "DeviceGray".to_string(),
"RGB" => "DeviceRGB".to_string(),
"CMYK" => "DeviceCMYK".to_string(),
"I" => "Indexed".to_string(),
"AHx" => "ASCIIHexDecode".to_string(),
"A85" => "ASCII85Decode".to_string(),
"LZW" => "LZWDecode".to_string(),
"Fl" => "FlateDecode".to_string(),
"RL" => "RunLengthDecode".to_string(),
"DCT" => "DCTDecode".to_string(),
"CCF" => "CCITTFaxDecode".to_string(),
_ => name.to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenize_numbers() {
let input = b"123 -45 3.14159 -0.5 .5";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(123)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(-45)));
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Number(3.14159))
);
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
assert_eq!(tokenizer.next_token().unwrap(), None);
}
#[test]
fn test_tokenize_strings() {
let input = b"(Hello World) (Hello\\nWorld) (Nested (paren))";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"Hello World".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"Hello\nWorld".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"Nested (paren)".to_vec()))
);
}
#[test]
fn test_tokenize_hex_strings() {
let input = b"<48656C6C6F> <48 65 6C 6C 6F>";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::HexString(b"Hello".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::HexString(b"Hello".to_vec()))
);
}
#[test]
fn test_tokenize_names() {
let input = b"/Name /Name#20with#20spaces /A#42C";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("Name".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("Name with spaces".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("ABC".to_string()))
);
}
#[test]
fn test_tokenize_operators() {
let input = b"BT Tj ET q Q";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Operator("BT".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Operator("Tj".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Operator("ET".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Operator("q".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Operator("Q".to_string()))
);
}
#[test]
fn test_parse_text_operators() {
let content = b"BT /F1 12 Tf 100 200 Td (Hello World) Tj ET";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 5);
assert_eq!(operators[0], ContentOperation::BeginText);
assert_eq!(
operators[1],
ContentOperation::SetFont("F1".to_string(), 12.0)
);
assert_eq!(operators[2], ContentOperation::MoveText(100.0, 200.0));
assert_eq!(
operators[3],
ContentOperation::ShowText(b"Hello World".to_vec())
);
assert_eq!(operators[4], ContentOperation::EndText);
}
#[test]
fn test_parse_graphics_operators() {
let content = b"q 1 0 0 1 50 50 cm 2 w 0 0 100 100 re S Q";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 6);
assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
assert_eq!(
operators[1],
ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
);
assert_eq!(operators[2], ContentOperation::SetLineWidth(2.0));
assert_eq!(
operators[3],
ContentOperation::Rectangle(0.0, 0.0, 100.0, 100.0)
);
assert_eq!(operators[4], ContentOperation::Stroke);
assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
}
#[test]
fn test_parse_color_operators() {
let content = b"0.5 g 1 0 0 rg 0 0 0 1 k";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 3);
assert_eq!(operators[0], ContentOperation::SetNonStrokingGray(0.5));
assert_eq!(
operators[1],
ContentOperation::SetNonStrokingRGB(1.0, 0.0, 0.0)
);
assert_eq!(
operators[2],
ContentOperation::SetNonStrokingCMYK(0.0, 0.0, 0.0, 1.0)
);
}
mod comprehensive_tests {
use super::*;
#[test]
fn test_all_text_operators() {
let content = b"BT 5 Tc 10 Tw 120 Tz 15 TL /F1 12 Tf 1 Tr 5 Ts 100 200 Td 50 150 TD T* (Hello) Tj ET";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators[0], ContentOperation::BeginText);
assert_eq!(operators[1], ContentOperation::SetCharSpacing(5.0));
assert_eq!(operators[2], ContentOperation::SetWordSpacing(10.0));
assert_eq!(operators[3], ContentOperation::SetHorizontalScaling(120.0));
assert_eq!(operators[4], ContentOperation::SetLeading(15.0));
assert_eq!(
operators[5],
ContentOperation::SetFont("F1".to_string(), 12.0)
);
assert_eq!(operators[6], ContentOperation::SetTextRenderMode(1));
assert_eq!(operators[7], ContentOperation::SetTextRise(5.0));
assert_eq!(operators[8], ContentOperation::MoveText(100.0, 200.0));
assert_eq!(
operators[9],
ContentOperation::MoveTextSetLeading(50.0, 150.0)
);
assert_eq!(operators[10], ContentOperation::NextLine);
assert_eq!(operators[11], ContentOperation::ShowText(b"Hello".to_vec()));
assert_eq!(operators[12], ContentOperation::EndText);
}
#[test]
fn test_all_graphics_state_operators() {
let content = b"q Q 1 0 0 1 50 50 cm 2 w 1 J 2 j 10 M /GS1 gs 0.5 i /Perceptual ri";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
assert_eq!(operators[1], ContentOperation::RestoreGraphicsState);
assert_eq!(
operators[2],
ContentOperation::SetTransformMatrix(1.0, 0.0, 0.0, 1.0, 50.0, 50.0)
);
assert_eq!(operators[3], ContentOperation::SetLineWidth(2.0));
assert_eq!(operators[4], ContentOperation::SetLineCap(1));
assert_eq!(operators[5], ContentOperation::SetLineJoin(2));
assert_eq!(operators[6], ContentOperation::SetMiterLimit(10.0));
assert_eq!(
operators[7],
ContentOperation::SetGraphicsStateParams("GS1".to_string())
);
assert_eq!(operators[8], ContentOperation::SetFlatness(0.5));
assert_eq!(
operators[9],
ContentOperation::SetIntent("Perceptual".to_string())
);
}
#[test]
fn test_all_path_construction_operators() {
let content = b"100 200 m 150 200 l 200 200 250 250 300 200 c 250 180 300 200 v 200 180 300 200 y h 50 50 100 100 re";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
assert_eq!(
operators[2],
ContentOperation::CurveTo(200.0, 200.0, 250.0, 250.0, 300.0, 200.0)
);
assert_eq!(
operators[3],
ContentOperation::CurveToV(250.0, 180.0, 300.0, 200.0)
);
assert_eq!(
operators[4],
ContentOperation::CurveToY(200.0, 180.0, 300.0, 200.0)
);
assert_eq!(operators[5], ContentOperation::ClosePath);
assert_eq!(
operators[6],
ContentOperation::Rectangle(50.0, 50.0, 100.0, 100.0)
);
}
#[test]
fn test_all_path_painting_operators() {
let content = b"S s f F f* B B* b b* n W W*";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators[0], ContentOperation::Stroke);
assert_eq!(operators[1], ContentOperation::CloseStroke);
assert_eq!(operators[2], ContentOperation::Fill);
assert_eq!(operators[3], ContentOperation::Fill); assert_eq!(operators[4], ContentOperation::FillEvenOdd);
assert_eq!(operators[5], ContentOperation::FillStroke);
assert_eq!(operators[6], ContentOperation::FillStrokeEvenOdd);
assert_eq!(operators[7], ContentOperation::CloseFillStroke);
assert_eq!(operators[8], ContentOperation::CloseFillStrokeEvenOdd);
assert_eq!(operators[9], ContentOperation::EndPath);
assert_eq!(operators[10], ContentOperation::Clip);
assert_eq!(operators[11], ContentOperation::ClipEvenOdd);
}
#[test]
fn test_all_color_operators() {
let content = b"/DeviceRGB CS /DeviceGray cs 0.7 G 0.4 g 1 0 0 RG 0 1 0 rg 0 0 0 1 K 0.2 0.3 0.4 0.5 k /Shade1 sh";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(
operators[0],
ContentOperation::SetStrokingColorSpace("DeviceRGB".to_string())
);
assert_eq!(
operators[1],
ContentOperation::SetNonStrokingColorSpace("DeviceGray".to_string())
);
assert_eq!(operators[2], ContentOperation::SetStrokingGray(0.7));
assert_eq!(operators[3], ContentOperation::SetNonStrokingGray(0.4));
assert_eq!(
operators[4],
ContentOperation::SetStrokingRGB(1.0, 0.0, 0.0)
);
assert_eq!(
operators[5],
ContentOperation::SetNonStrokingRGB(0.0, 1.0, 0.0)
);
assert_eq!(
operators[6],
ContentOperation::SetStrokingCMYK(0.0, 0.0, 0.0, 1.0)
);
assert_eq!(
operators[7],
ContentOperation::SetNonStrokingCMYK(0.2, 0.3, 0.4, 0.5)
);
assert_eq!(
operators[8],
ContentOperation::ShadingFill("Shade1".to_string())
);
}
#[test]
fn test_xobject_and_marked_content_operators() {
let content = b"/Image1 Do /MC1 BMC EMC /MP1 MP BX EX";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(
operators[0],
ContentOperation::PaintXObject("Image1".to_string())
);
assert_eq!(
operators[1],
ContentOperation::BeginMarkedContent("MC1".to_string())
);
assert_eq!(operators[2], ContentOperation::EndMarkedContent);
assert_eq!(
operators[3],
ContentOperation::DefineMarkedContentPoint("MP1".to_string())
);
assert_eq!(operators[4], ContentOperation::BeginCompatibility);
assert_eq!(operators[5], ContentOperation::EndCompatibility);
}
#[test]
fn test_complex_content_stream() {
let content = b"q 0.5 0 0 0.5 100 100 cm BT /F1 12 Tf 0 0 Td (Complex) Tj ET Q";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 8);
assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
assert_eq!(
operators[1],
ContentOperation::SetTransformMatrix(0.5, 0.0, 0.0, 0.5, 100.0, 100.0)
);
assert_eq!(operators[2], ContentOperation::BeginText);
assert_eq!(
operators[3],
ContentOperation::SetFont("F1".to_string(), 12.0)
);
assert_eq!(operators[4], ContentOperation::MoveText(0.0, 0.0));
assert_eq!(
operators[5],
ContentOperation::ShowText(b"Complex".to_vec())
);
assert_eq!(operators[6], ContentOperation::EndText);
assert_eq!(operators[7], ContentOperation::RestoreGraphicsState);
}
#[test]
fn test_tokenizer_whitespace_handling() {
let input = b" \t\n\r BT \t\n /F1 12.5 \t Tf \n\r ET ";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Operator("BT".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("F1".to_string()))
);
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(12.5)));
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Operator("Tf".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Operator("ET".to_string()))
);
assert_eq!(tokenizer.next_token().unwrap(), None);
}
#[test]
fn test_tokenizer_edge_cases() {
let input = b"0 .5 -.5 +.5 123. .123 1.23 -1.23";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Integer(0)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-0.5)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.5)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(123.0)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(0.123)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(1.23)));
assert_eq!(tokenizer.next_token().unwrap(), Some(Token::Number(-1.23)));
}
#[test]
fn test_string_parsing_edge_cases() {
let input = b"(Simple) (With\\\\backslash) (With\\)paren) (With\\newline) (With\\ttab) (With\\rcarriage) (With\\bbackspace) (With\\fformfeed) (With\\(leftparen) (With\\)rightparen) (With\\377octal) (With\\dddoctal)";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"Simple".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With\\backslash".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With)paren".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With\newline".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With\ttab".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With\rcarriage".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With\x08backspace".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With\x0Cformfeed".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With(leftparen".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::String(b"With)rightparen".to_vec()))
);
}
#[test]
fn test_hex_string_parsing() {
let input = b"<48656C6C6F> <48 65 6C 6C 6F> <48656C6C6F57> <48656C6C6F5>";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::HexString(b"Hello".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::HexString(b"Hello".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::HexString(b"HelloW".to_vec()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::HexString(b"Hello\x50".to_vec()))
);
}
#[test]
fn test_name_parsing_edge_cases() {
let input = b"/Name /Name#20with#20spaces /Name#23with#23hash /Name#2Fwith#2Fslash /#45mptyName";
let mut tokenizer = ContentTokenizer::new(input);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("Name".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("Name with spaces".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("Name#with#hash".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("Name/with/slash".to_string()))
);
assert_eq!(
tokenizer.next_token().unwrap(),
Some(Token::Name("EmptyName".to_string()))
);
}
#[test]
fn test_operator_parsing_edge_cases() {
let content = b"q q q Q Q Q BT BT ET ET";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 10);
assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
assert_eq!(operators[1], ContentOperation::SaveGraphicsState);
assert_eq!(operators[2], ContentOperation::SaveGraphicsState);
assert_eq!(operators[3], ContentOperation::RestoreGraphicsState);
assert_eq!(operators[4], ContentOperation::RestoreGraphicsState);
assert_eq!(operators[5], ContentOperation::RestoreGraphicsState);
assert_eq!(operators[6], ContentOperation::BeginText);
assert_eq!(operators[7], ContentOperation::BeginText);
assert_eq!(operators[8], ContentOperation::EndText);
assert_eq!(operators[9], ContentOperation::EndText);
}
#[test]
fn test_error_handling_insufficient_operands() {
let content = b"100 Td"; let result = ContentParser::parse(content);
assert!(result.is_err());
}
#[test]
fn test_error_handling_invalid_operator() {
let content = b"100 200 INVALID";
let result = ContentParser::parse(content);
assert!(result.is_err());
}
#[test]
fn test_error_handling_malformed_string() {
let input = b"(Unclosed string";
let mut tokenizer = ContentTokenizer::new(input);
let result = tokenizer.next_token();
assert!(result.is_ok() || result.is_err());
}
#[test]
fn test_error_handling_malformed_hex_string() {
let input = b"<48656C6C6G>";
let mut tokenizer = ContentTokenizer::new(input);
let result = tokenizer.next_token();
assert!(result.is_err());
}
#[test]
fn test_error_handling_malformed_name() {
let input = b"/Name#GG";
let mut tokenizer = ContentTokenizer::new(input);
let result = tokenizer.next_token();
assert!(result.is_err());
}
#[test]
fn test_empty_content_stream() {
let content = b"";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 0);
}
#[test]
fn test_whitespace_only_content_stream() {
let content = b" \t\n\r ";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 0);
}
#[test]
fn test_mixed_integer_and_real_operands() {
let content = b"100 200 m 150 200 l";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 2);
assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
}
#[test]
fn test_negative_operands() {
let content = b"-100 -200 Td -50.5 -75.2 TD";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 2);
assert_eq!(operators[0], ContentOperation::MoveText(-100.0, -200.0));
assert_eq!(
operators[1],
ContentOperation::MoveTextSetLeading(-50.5, -75.2)
);
}
#[test]
fn test_large_numbers() {
let content = b"999999.999999 -999999.999999 m";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 1);
assert_eq!(
operators[0],
ContentOperation::MoveTo(999999.999999, -999999.999999)
);
}
#[test]
fn test_scientific_notation() {
let content = b"123.45 -456.78 m";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 1);
assert_eq!(operators[0], ContentOperation::MoveTo(123.45, -456.78));
}
#[test]
fn test_show_text_array_complex() {
let content = b"(Hello) TJ";
let result = ContentParser::parse(content);
assert!(result.is_err());
}
#[test]
fn test_dash_pattern_empty() {
let content = b"0 d";
let result = ContentParser::parse(content);
assert!(result.is_err());
}
#[test]
fn test_dash_pattern_complex() {
let content = b"2.5 d";
let result = ContentParser::parse(content);
assert!(result.is_err());
}
#[test]
fn test_pop_array_removes_array_end() {
let parser = ContentParser::new(b"");
let mut operands = vec![
Token::ArrayStart,
Token::Integer(1),
Token::Integer(2),
Token::Integer(3),
Token::ArrayEnd,
];
let result = parser.pop_array(&mut operands).unwrap();
assert_eq!(result.len(), 3);
assert!(operands.is_empty());
let mut operands = vec![Token::ArrayStart, Token::Number(1.5), Token::Number(2.5)];
let result = parser.pop_array(&mut operands).unwrap();
assert_eq!(result.len(), 2);
assert!(operands.is_empty());
}
#[test]
fn test_dash_array_parsing_valid() {
let parser = ContentParser::new(b"");
let valid_tokens = vec![Token::Number(3.0), Token::Integer(2)];
let result = parser.parse_dash_array(valid_tokens).unwrap();
assert_eq!(result, vec![3.0, 2.0]);
let empty_tokens = vec![];
let result = parser.parse_dash_array(empty_tokens).unwrap();
let expected: Vec<f32> = vec![];
assert_eq!(result, expected);
}
#[test]
fn test_text_array_parsing_valid() {
let parser = ContentParser::new(b"");
let valid_tokens = vec![
Token::String(b"Hello".to_vec()),
Token::Number(-100.0),
Token::String(b"World".to_vec()),
];
let result = parser.parse_text_array(valid_tokens).unwrap();
assert_eq!(result.len(), 3);
}
#[test]
fn test_inline_image_handling() {
let content = b"BI /W 100 /H 100 /BPC 8 /CS /RGB ID some_image_data EI";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 1);
match &operators[0] {
ContentOperation::InlineImage { params, data: _ } => {
assert_eq!(params.get("Width"), Some(&Object::Integer(100)));
assert_eq!(params.get("Height"), Some(&Object::Integer(100)));
assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
assert_eq!(
params.get("ColorSpace"),
Some(&Object::Name("DeviceRGB".to_string()))
);
}
_ => panic!("Expected InlineImage operation"),
}
}
#[test]
fn test_inline_image_with_filter() {
let content = b"BI /W 50 /H 50 /CS /G /BPC 1 /F /AHx ID 00FF00FF EI";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 1);
match &operators[0] {
ContentOperation::InlineImage { params, data: _ } => {
assert_eq!(params.get("Width"), Some(&Object::Integer(50)));
assert_eq!(params.get("Height"), Some(&Object::Integer(50)));
assert_eq!(
params.get("ColorSpace"),
Some(&Object::Name("DeviceGray".to_string()))
);
assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(1)));
assert_eq!(
params.get("Filter"),
Some(&Object::Name("ASCIIHexDecode".to_string()))
);
}
_ => panic!("Expected InlineImage operation"),
}
}
#[test]
fn test_content_parser_performance() {
let mut content = Vec::new();
for i in 0..1000 {
content.extend_from_slice(format!("{} {} m ", i, i + 1).as_bytes());
}
let start = std::time::Instant::now();
let operators = ContentParser::parse(&content).unwrap();
let duration = start.elapsed();
assert_eq!(operators.len(), 1000);
assert!(duration.as_millis() < 100); }
#[test]
fn test_tokenizer_performance() {
let mut input = Vec::new();
for i in 0..1000 {
input.extend_from_slice(format!("{} {} ", i, i + 1).as_bytes());
}
let start = std::time::Instant::now();
let mut tokenizer = ContentTokenizer::new(&input);
let mut count = 0;
while tokenizer.next_token().unwrap().is_some() {
count += 1;
}
let duration = start.elapsed();
assert_eq!(count, 2000); assert!(duration.as_millis() < 50); }
#[test]
fn test_memory_usage_large_content() {
let mut content = Vec::new();
for i in 0..10000 {
content.extend_from_slice(
format!("{} {} {} {} {} {} c ", i, i + 1, i + 2, i + 3, i + 4, i + 5)
.as_bytes(),
);
}
let operators = ContentParser::parse(&content).unwrap();
assert_eq!(operators.len(), 10000);
for op in operators {
matches!(op, ContentOperation::CurveTo(_, _, _, _, _, _));
}
}
#[test]
fn test_concurrent_parsing() {
use std::sync::Arc;
use std::thread;
let content = Arc::new(b"BT /F1 12 Tf 100 200 Td (Hello) Tj ET".to_vec());
let handles: Vec<_> = (0..10)
.map(|_| {
let content_clone = content.clone();
thread::spawn(move || ContentParser::parse(&content_clone).unwrap())
})
.collect();
for handle in handles {
let operators = handle.join().unwrap();
assert_eq!(operators.len(), 5);
assert_eq!(operators[0], ContentOperation::BeginText);
assert_eq!(operators[4], ContentOperation::EndText);
}
}
#[test]
fn test_tokenizer_hex_string_edge_cases() {
let mut tokenizer = ContentTokenizer::new(b"<>");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::HexString(data) => assert!(data.is_empty()),
_ => panic!("Expected empty hex string"),
}
let mut tokenizer = ContentTokenizer::new(b"<123>");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::HexString(data) => assert_eq!(data, vec![0x12, 0x30]),
_ => panic!("Expected hex string with odd digits"),
}
let mut tokenizer = ContentTokenizer::new(b"<12 34\t56\n78>");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::HexString(data) => assert_eq!(data, vec![0x12, 0x34, 0x56, 0x78]),
_ => panic!("Expected hex string with whitespace"),
}
}
#[test]
fn test_tokenizer_literal_string_escape_sequences() {
let mut tokenizer = ContentTokenizer::new(b"(\\n\\r\\t\\b\\f\\(\\)\\\\)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => {
assert_eq!(
data,
vec![b'\n', b'\r', b'\t', 0x08, 0x0C, b'(', b')', b'\\']
);
}
_ => panic!("Expected string with escapes"),
}
let mut tokenizer = ContentTokenizer::new(b"(\\101\\040\\377)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => assert_eq!(data, vec![b'A', b' ', 255]),
_ => panic!("Expected string with octal escapes"),
}
}
#[test]
fn test_tokenizer_nested_parentheses() {
let mut tokenizer = ContentTokenizer::new(b"(outer (inner) text)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => {
assert_eq!(data, b"outer (inner) text");
}
_ => panic!("Expected string with nested parentheses"),
}
let mut tokenizer = ContentTokenizer::new(b"(level1 (level2 (level3) back2) back1)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => {
assert_eq!(data, b"level1 (level2 (level3) back2) back1");
}
_ => panic!("Expected string with deep nesting"),
}
}
#[test]
fn test_tokenizer_name_hex_escapes() {
let mut tokenizer = ContentTokenizer::new(b"/Name#20With#20Spaces");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::Name(name) => assert_eq!(name, "Name With Spaces"),
_ => panic!("Expected name with hex escapes"),
}
let mut tokenizer = ContentTokenizer::new(b"/Special#2F#28#29#3C#3E");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::Name(name) => assert_eq!(name, "Special/()<>"),
_ => panic!("Expected name with special character escapes"),
}
}
#[test]
fn test_tokenizer_number_edge_cases() {
let mut tokenizer = ContentTokenizer::new(b"2147483647");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::Integer(n) => assert_eq!(n, 2147483647),
_ => panic!("Expected large integer"),
}
let mut tokenizer = ContentTokenizer::new(b"0.00001");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::Number(n) => assert!((n - 0.00001).abs() < f32::EPSILON),
_ => panic!("Expected small float"),
}
let mut tokenizer = ContentTokenizer::new(b".5");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::Number(n) => assert!((n - 0.5).abs() < f32::EPSILON),
_ => panic!("Expected float starting with dot"),
}
}
#[test]
fn test_parser_complex_path_operations() {
let content = b"100 200 m 150 200 l 150 250 l 100 250 l h f";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 6);
assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
assert_eq!(operators[1], ContentOperation::LineTo(150.0, 200.0));
assert_eq!(operators[2], ContentOperation::LineTo(150.0, 250.0));
assert_eq!(operators[3], ContentOperation::LineTo(100.0, 250.0));
assert_eq!(operators[4], ContentOperation::ClosePath);
assert_eq!(operators[5], ContentOperation::Fill);
}
#[test]
fn test_parser_bezier_curves() {
let content = b"100 100 150 50 200 150 c";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 1);
match &operators[0] {
ContentOperation::CurveTo(x1, y1, x2, y2, x3, y3) => {
assert!(x1.is_finite() && y1.is_finite());
assert!(x2.is_finite() && y2.is_finite());
assert!(x3.is_finite() && y3.is_finite());
assert!(*x1 >= 50.0 && *x1 <= 200.0);
assert!(*y1 >= 50.0 && *y1 <= 200.0);
}
_ => panic!("Expected CurveTo operation"),
}
}
#[test]
fn test_parser_color_operations() {
let content = b"0.5 g 1 0 0 rg 0 1 0 1 k /DeviceRGB cs 0.2 0.4 0.6 sc";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 5);
match &operators[0] {
ContentOperation::SetNonStrokingGray(gray) => assert_eq!(*gray, 0.5),
_ => panic!("Expected SetNonStrokingGray"),
}
match &operators[1] {
ContentOperation::SetNonStrokingRGB(r, g, b) => {
assert_eq!((*r, *g, *b), (1.0, 0.0, 0.0));
}
_ => panic!("Expected SetNonStrokingRGB"),
}
}
#[test]
fn test_parser_text_positioning_advanced() {
let content = b"BT 1 0 0 1 100 200 Tm 0 TL 10 TL (Line 1) ' (Line 2) ' ET";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 7);
assert_eq!(operators[0], ContentOperation::BeginText);
match &operators[1] {
ContentOperation::SetTextMatrix(a, b, c, d, e, f) => {
assert_eq!((*a, *b, *c, *d, *e, *f), (1.0, 0.0, 0.0, 1.0, 100.0, 200.0));
}
_ => panic!("Expected SetTextMatrix"),
}
assert_eq!(operators[6], ContentOperation::EndText);
}
#[test]
fn test_parser_graphics_state_operations() {
let content = b"q 2 0 0 2 100 100 cm 5 w 1 J 2 j 10 M Q";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 7);
assert_eq!(operators[0], ContentOperation::SaveGraphicsState);
match &operators[1] {
ContentOperation::SetTransformMatrix(a, b, c, d, e, f) => {
assert_eq!((*a, *b, *c, *d, *e, *f), (2.0, 0.0, 0.0, 2.0, 100.0, 100.0));
}
_ => panic!("Expected SetTransformMatrix"),
}
assert_eq!(operators[6], ContentOperation::RestoreGraphicsState);
}
#[test]
fn test_parser_xobject_operations() {
let content = b"/Image1 Do /Form2 Do /Pattern3 Do";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 3);
for (i, expected_name) in ["Image1", "Form2", "Pattern3"].iter().enumerate() {
match &operators[i] {
ContentOperation::PaintXObject(name) => assert_eq!(name, expected_name),
_ => panic!("Expected PaintXObject"),
}
}
}
#[test]
fn test_parser_marked_content_operations() {
let content = b"/P BMC (Tagged content) Tj EMC";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 3);
match &operators[0] {
ContentOperation::BeginMarkedContent(tag) => assert_eq!(tag, "P"),
_ => panic!("Expected BeginMarkedContent"),
}
assert_eq!(operators[2], ContentOperation::EndMarkedContent);
}
#[test]
fn test_parser_error_handling_invalid_operators() {
let content = b"m";
let result = ContentParser::parse(content);
assert!(result.is_err());
let content = b"<ABC DEF BT";
let result = ContentParser::parse(content);
assert!(result.is_err());
let content = b"100 200 300"; let result = ContentParser::parse(content);
assert!(result.is_ok()); }
#[test]
fn test_parser_whitespace_tolerance() {
let content = b" \n\t 100 \r\n 200 \t m \n";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 1);
assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
}
#[test]
fn test_tokenizer_comment_handling() {
let content = b"100 % This is a comment\n200 m % Another comment";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 1);
assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
}
#[test]
fn test_parser_stream_with_binary_data() {
let content = b"100 200 m % Comment with \xFF binary\n150 250 l";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 2);
assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
}
#[test]
fn test_tokenizer_array_parsing() {
let content = b"100 200 m 150 250 l";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 2);
assert_eq!(operators[0], ContentOperation::MoveTo(100.0, 200.0));
assert_eq!(operators[1], ContentOperation::LineTo(150.0, 250.0));
}
#[test]
fn test_parser_rectangle_operations() {
let content = b"10 20 100 50 re 0 0 200 300 re";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 2);
match &operators[0] {
ContentOperation::Rectangle(x, y, width, height) => {
assert_eq!((*x, *y, *width, *height), (10.0, 20.0, 100.0, 50.0));
}
_ => panic!("Expected Rectangle operation"),
}
match &operators[1] {
ContentOperation::Rectangle(x, y, width, height) => {
assert_eq!((*x, *y, *width, *height), (0.0, 0.0, 200.0, 300.0));
}
_ => panic!("Expected Rectangle operation"),
}
}
#[test]
fn test_parser_clipping_operations() {
let content = b"100 100 50 50 re W n 200 200 75 75 re W* n";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 6);
assert_eq!(operators[1], ContentOperation::Clip);
assert_eq!(operators[2], ContentOperation::EndPath);
assert_eq!(operators[4], ContentOperation::ClipEvenOdd);
assert_eq!(operators[5], ContentOperation::EndPath);
}
#[test]
fn test_parser_painting_operations() {
let content = b"S s f f* B B* b b*";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 8);
assert_eq!(operators[0], ContentOperation::Stroke);
assert_eq!(operators[1], ContentOperation::CloseStroke);
assert_eq!(operators[2], ContentOperation::Fill);
assert_eq!(operators[3], ContentOperation::FillEvenOdd);
assert_eq!(operators[4], ContentOperation::FillStroke);
assert_eq!(operators[5], ContentOperation::FillStrokeEvenOdd);
assert_eq!(operators[6], ContentOperation::CloseFillStroke);
assert_eq!(operators[7], ContentOperation::CloseFillStrokeEvenOdd);
}
#[test]
fn test_parser_line_style_operations() {
let content = b"5 w 1 J 2 j 10 M [ 3 2 ] 0 d";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 5);
assert_eq!(operators[0], ContentOperation::SetLineWidth(5.0));
assert_eq!(operators[1], ContentOperation::SetLineCap(1));
assert_eq!(operators[2], ContentOperation::SetLineJoin(2));
assert_eq!(operators[3], ContentOperation::SetMiterLimit(10.0));
}
#[test]
fn test_parser_text_state_operations() {
let content = b"12 Tc 3 Tw 100 Tz 1 Tr 2 Ts";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 5);
assert_eq!(operators[0], ContentOperation::SetCharSpacing(12.0));
assert_eq!(operators[1], ContentOperation::SetWordSpacing(3.0));
assert_eq!(operators[2], ContentOperation::SetHorizontalScaling(100.0));
assert_eq!(operators[3], ContentOperation::SetTextRenderMode(1));
assert_eq!(operators[4], ContentOperation::SetTextRise(2.0));
}
#[test]
fn test_parser_unicode_text() {
let content = b"BT (Hello \xC2\xA9 World \xE2\x9C\x93) Tj ET";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 3);
assert_eq!(operators[0], ContentOperation::BeginText);
match &operators[1] {
ContentOperation::ShowText(text) => {
assert!(text.len() > 5); }
_ => panic!("Expected ShowText operation"),
}
assert_eq!(operators[2], ContentOperation::EndText);
}
#[test]
fn test_parser_stress_test_large_coordinates() {
let content = b"999999.999 -999999.999 999999.999 -999999.999 999999.999 -999999.999 c";
let operators = ContentParser::parse(content).unwrap();
assert_eq!(operators.len(), 1);
match &operators[0] {
ContentOperation::CurveTo(_x1, _y1, _x2, _y2, _x3, _y3) => {
assert!((*_x1 - 999999.999).abs() < 0.1);
assert!((*_y1 - (-999999.999)).abs() < 0.1);
assert!((*_x3 - 999999.999).abs() < 0.1);
}
_ => panic!("Expected CurveTo operation"),
}
}
#[test]
fn test_parser_empty_content_stream() {
let content = b"";
let operators = ContentParser::parse(content).unwrap();
assert!(operators.is_empty());
let content = b" \n\t\r ";
let operators = ContentParser::parse(content).unwrap();
assert!(operators.is_empty());
}
#[test]
fn test_tokenizer_error_recovery() {
let content = b"100 200 m % Comment with\xFFbinary\n150 250 l";
let result = ContentParser::parse(content);
assert!(result.is_ok() || result.is_err());
}
#[test]
fn test_parser_optimization_repeated_operations() {
let mut content = Vec::new();
for i in 0..1000 {
content.extend_from_slice(format!("{} {} m ", i, i * 2).as_bytes());
}
let start = std::time::Instant::now();
let operators = ContentParser::parse(&content).unwrap();
let duration = start.elapsed();
assert_eq!(operators.len(), 1000);
assert!(duration.as_millis() < 200); }
#[test]
fn test_parser_memory_efficiency_large_strings() {
let large_text = "A".repeat(10000);
let content = format!("BT ({}) Tj ET", large_text);
let operators = ContentParser::parse(content.as_bytes()).unwrap();
assert_eq!(operators.len(), 3);
match &operators[1] {
ContentOperation::ShowText(text) => {
assert_eq!(text.len(), 10000);
}
_ => panic!("Expected ShowText operation"),
}
}
}
#[test]
fn test_content_stream_too_large() {
let mut large_content = Vec::new();
for i in 0..10000 {
large_content.extend_from_slice(format!("{} {} m ", i, i).as_bytes());
}
large_content.extend_from_slice(b"S");
let result = ContentParser::parse_content(&large_content);
assert!(result.is_ok());
let operations = result.unwrap();
assert!(operations.len() > 10000);
}
#[test]
fn test_invalid_operator_handling() {
let content = b"100 200 INVALID_OP 300 400 m";
let result = ContentParser::parse_content(content);
if let Ok(operations) = result {
assert!(operations
.iter()
.any(|op| matches!(op, ContentOperation::MoveTo(_, _))));
}
}
#[test]
fn test_nested_arrays_malformed() {
let content = b"[[(Hello] [World)]] TJ";
let result = ContentParser::parse_content(content);
assert!(result.is_ok() || result.is_err());
}
#[test]
fn test_escape_sequences_in_strings() {
let test_cases = vec![
(b"(\\n\\r\\t)".as_slice(), b"\n\r\t".as_slice()),
(b"(\\\\)".as_slice(), b"\\".as_slice()),
(b"(\\(\\))".as_slice(), b"()".as_slice()),
(b"(\\123)".as_slice(), b"S".as_slice()), (b"(\\0)".as_slice(), b"\0".as_slice()),
];
for (input, expected) in test_cases {
let mut content = Vec::new();
content.extend_from_slice(input);
content.extend_from_slice(b" Tj");
let result = ContentParser::parse_content(&content);
assert!(result.is_ok());
let operations = result.unwrap();
if let ContentOperation::ShowText(text) = &operations[0] {
assert_eq!(text, expected, "Failed for input: {:?}", input);
} else {
panic!("Expected ShowText operation");
}
}
}
#[test]
fn test_content_with_inline_images() {
let content = b"BI /W 10 /H 10 /CS /RGB ID \x00\x01\x02\x03 EI";
let result = ContentParser::parse_content(content);
assert!(result.is_ok() || result.is_err());
}
#[test]
fn test_operator_with_missing_operands() {
let test_cases = vec![
b"Tj" as &[u8], b"m", b"rg", b"Tf", ];
for content in test_cases {
let result = ContentParser::parse_content(content);
assert!(result.is_ok() || result.is_err());
}
}
#[test]
fn test_tokenizer_handles_curly_braces() {
let input = b"q { } Q";
let mut tokenizer = ContentTokenizer::new(input);
let mut tokens = Vec::new();
while let Some(token) = tokenizer.next_token().unwrap() {
tokens.push(token);
}
assert!(tokens.contains(&Token::Operator("q".to_string())));
assert!(tokens.contains(&Token::Operator("Q".to_string())));
}
#[test]
fn test_tokenizer_handles_closing_paren() {
let input = b"q ) Q";
let mut tokenizer = ContentTokenizer::new(input);
let mut tokens = Vec::new();
while let Some(token) = tokenizer.next_token().unwrap() {
tokens.push(token);
}
assert!(tokens.contains(&Token::Operator("q".to_string())));
assert!(tokens.contains(&Token::Operator("Q".to_string())));
}
#[test]
fn test_inline_image_binary_with_curly_braces() {
let content = b"BI /W 2 /H 2 /BPC 8 /CS /G ID \x7B\x7D\x00\xFF EI Q";
let result = ContentParser::parse_content(content);
assert!(
result.is_ok(),
"Parsing inline image with curly braces failed: {:?}",
result.err()
);
let ops = result.unwrap();
let has_inline = ops
.iter()
.any(|op| matches!(op, ContentOperation::InlineImage { .. }));
let has_q = ops
.iter()
.any(|op| matches!(op, ContentOperation::RestoreGraphicsState));
assert!(has_inline, "Expected InlineImage operation");
assert!(has_q, "Expected RestoreGraphicsState after EI");
}
#[test]
fn test_inline_image_binary_with_all_byte_values() {
let mut content = Vec::new();
content.extend_from_slice(b"BI /W 16 /H 16 /BPC 8 /CS /G ID ");
for b in 0u8..=255 {
content.push(b);
}
content.extend_from_slice(b" EI Q");
let result = ContentParser::parse_content(&content);
assert!(
result.is_ok(),
"Parsing inline image with all byte values failed: {:?}",
result.err()
);
}
#[test]
fn test_inline_image_ei_detection() {
let content = b"BI /W 2 /H 1 /BPC 8 /CS /G ID \x45\x49\x00\n EI Q";
let result = ContentParser::parse_content(content);
assert!(result.is_ok(), "EI detection failed: {:?}", result.err());
let ops = result.unwrap();
let has_inline = ops
.iter()
.any(|op| matches!(op, ContentOperation::InlineImage { .. }));
assert!(has_inline, "Expected InlineImage operation");
}
#[test]
fn test_tokenizer_no_infinite_loop_on_consecutive_delimiters() {
let input = b"q {{{}}})))) Q";
let mut tokenizer = ContentTokenizer::new(input);
let mut tokens = Vec::new();
while let Some(token) = tokenizer.next_token().unwrap() {
tokens.push(token);
if tokens.len() > 100 {
panic!("Tokenizer produced too many tokens โ possible infinite loop");
}
}
assert!(tokens.contains(&Token::Operator("q".to_string())));
assert!(tokens.contains(&Token::Operator("Q".to_string())));
}
#[test]
fn test_content_parser_inline_image_produces_correct_operation() {
let content = b"BI /W 4 /H 4 /BPC 8 /CS /G ID \x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F EI";
let result = ContentParser::parse_content(content);
assert!(result.is_ok(), "Parse failed: {:?}", result.err());
let ops = result.unwrap();
assert_eq!(
ops.len(),
1,
"Expected exactly 1 operation, got {}",
ops.len()
);
if let ContentOperation::InlineImage { params, data } = &ops[0] {
assert_eq!(params.get("Width"), Some(&Object::Integer(4)));
assert_eq!(params.get("Height"), Some(&Object::Integer(4)));
assert_eq!(params.get("BitsPerComponent"), Some(&Object::Integer(8)));
assert!(!data.is_empty(), "Image data should not be empty");
} else {
panic!("Expected InlineImage operation, got {:?}", ops[0]);
}
}
#[test]
fn test_octal_escape_overflow_777() {
let mut tokenizer = ContentTokenizer::new(b"(\\777)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => assert_eq!(data, vec![0xFF]),
_ => panic!("Expected string token"),
}
}
#[test]
fn test_octal_escape_overflow_400() {
let mut tokenizer = ContentTokenizer::new(b"(\\400)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => assert_eq!(data, vec![0x00]),
_ => panic!("Expected string token"),
}
}
#[test]
fn test_octal_escape_overflow_577() {
let mut tokenizer = ContentTokenizer::new(b"(\\577)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => assert_eq!(data, vec![0x7F]),
_ => panic!("Expected string token"),
}
}
#[test]
fn test_octal_escape_max_valid_377() {
let mut tokenizer = ContentTokenizer::new(b"(\\377)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => assert_eq!(data, vec![0xFF]),
_ => panic!("Expected string token"),
}
}
#[test]
fn test_octal_escape_overflow_mixed_with_valid() {
let mut tokenizer = ContentTokenizer::new(b"(A\\777B\\101C)");
let token = tokenizer.next_token().unwrap().unwrap();
match token {
Token::String(data) => {
assert_eq!(data, vec![b'A', 0xFF, b'B', b'A', b'C']);
}
_ => panic!("Expected string token"),
}
}
}