use crate::constants::pdf_key::{END_OBJ, END_STREAM, OBJ, R, STREAM};
use crate::constants::*;
use crate::error::{PDFError, Result};
use crate::objects::{Dictionary, PDFNumber, PDFObject, PDFStrKind, PDFString, Stream, XEntry};
use crate::tokenizer::Token::{Delimiter, Id, Key, Number};
use crate::tokenizer::{Token, Tokenizer};
use std::collections::HashMap;
use crate::error::PDFError::{EOFError, PDFParseError, PDFParseError0};
use crate::utils::hex2bytes;
pub(crate) fn parse_with_offset(tokenizer: &mut Tokenizer, offset: u64) -> Result<PDFObject> {
tokenizer.seek(offset)?;
parse(tokenizer)
}
pub(crate) fn parse(mut tokenizer: &mut Tokenizer) -> Result<PDFObject>
{
let token = tokenizer.next_token()?;
let object = parser0(&mut tokenizer, token)?;
Ok(object)
}
fn parser0(tokenizer: &mut Tokenizer, token: Token) -> Result<PDFObject> {
match token {
Delimiter(delimiter) => match delimiter.as_str() {
"<<" => {
let dict = parse_dict(tokenizer)?;
if tokenizer.check_next_token0(false, |token| token.key_was(STREAM))? {
return parse_stream(tokenizer, dict);
}
Ok(PDFObject::Dict(dict))
}
"[" => parse_array(tokenizer),
"/" => parse_named(tokenizer),
"<" | "(" => parse_string(tokenizer, delimiter == "("),
_ => Err(PDFParseError0(format!("Delimiter '{}' not implemented", delimiter))),
},
Key(key) => match key.as_str() {
pdf_key::NULL => Ok(PDFObject::Null),
pdf_key::TURE => Ok(PDFObject::Bool(true)),
pdf_key::FALSE => Ok(PDFObject::Bool(false)),
pdf_key::TRAILER => {
let token = tokenizer.next_token()?;
parser0(tokenizer, token)
}
&_ => Err(PDFParseError0(format!("Key '{}' not implemented", key))),
}
Number(number) => match number {
PDFNumber::Unsigned(value) => {
let is_num = tokenizer.check_next_token(|token| token.is_u64())?;
if !is_num {
return Ok(PDFObject::Number(number));
}
let is_obj = tokenizer.check_next_token(|token| token.key_was(R) || token.key_was(OBJ))?;
if is_obj {
return parse_obj(tokenizer, Some(value as u32));
}
Ok(PDFObject::Number(number))
}
_ => Ok(PDFObject::Number(number))
},
Token::Eof => Err(EOFError),
_ => Err(PDFParseError0(format!("Illegal token:{}", token.to_string())))
}
}
pub(crate) fn parse_text_xref(tokenizer: &mut Tokenizer) -> Result<Vec<XEntry>> {
let obj_num = tokenizer.next_token()?.as_u32()?;
let length = tokenizer.next_token()?.as_u32()?;
let mut entries = Vec::<XEntry>::new();
for i in 0..length {
let value = tokenizer.next_token()?.as_u64()?;
let gen_num = tokenizer.next_token()?.as_u16()?;
let state = tokenizer.next_token()?.to_string();
let using = match state.as_str() {
"n" => true,
"f" => false,
_ => return Err(PDFParseError0(format!("Except a token with 'f' or 'n' but it is '{}'", state)))
};
let obj_num = obj_num + i;
let entry = XEntry::new(
obj_num,
gen_num,
value,
using,
);
entries.push(entry);
}
Ok(entries)
}
fn parse_obj(tokenizer: &mut Tokenizer, option: Option<u32>) -> Result<PDFObject> {
let obj_num = match option {
Some(num) => num,
None => tokenizer.next_token()?.as_u32()?
};
let obj_gen_token = tokenizer.next_token()?.except(|token| token.is_u64())?;
let type_token = tokenizer.next_token()?.except(|token| token.key_was(R) || token.key_was(OBJ))?;
let gen_num = obj_gen_token.as_u16()?;
if let Key(ref key) = type_token {
let object = match key.as_str() {
OBJ => {
let token = tokenizer.next_token()?;
let value = parser0(tokenizer, token)?;
tokenizer.next_token()?.except(|token| token.key_was(END_OBJ))?;
return Ok(PDFObject::IndirectObject(obj_num, gen_num, Box::new(value)));
}
_ => {
PDFObject::ObjectRef(obj_num, gen_num)
}
};
return Ok(object);
}
Err(PDFParseError("Except a token with R or obj"))
}
fn parse_dict(mut tokenizer: &mut Tokenizer) -> Result<Dictionary> {
let mut entries = HashMap::<String, PDFObject>::new();
loop {
let token = tokenizer.next_token()?;
if let Delimiter(ref delimiter) = token {
if delimiter == ">>" {
break;
}
}
let object = parser0(&mut tokenizer, token)?;
if let PDFObject::Named(named) = object {
let token = tokenizer.next_token()?;
let value = parser0(&mut tokenizer, token)?;
entries.insert(named, value);
} else {
return Err(PDFParseError("Except a named token."));
}
}
Ok(Dictionary::new(entries))
}
fn parse_named(tokenizer: &mut Tokenizer) -> Result<PDFObject> {
let token = tokenizer.next_token()?;
if let Id(name) = token {
return Ok(PDFObject::Named(name));
}
Err(PDFParseError("Except a identifier token."))
}
fn parse_array(tokenizer: &mut Tokenizer) -> Result<PDFObject> {
let mut elements = Vec::<PDFObject>::new();
loop {
let token = tokenizer.next_token()?;
if let Delimiter(ref delimiter) = token {
if delimiter == "]" {
return Ok(PDFObject::Array(elements));
}
}
let object = parser0(tokenizer, token)?;
elements.push(object);
}
}
fn parse_string(tokenizer: &mut Tokenizer, literal_str: bool) -> Result<PDFObject> {
let end_chr = if literal_str { ')' } else { '>' };
let mut is_escape = false;
let result = tokenizer.loop_util(&[], |chr| {
let m = if chr == '\\' {
is_escape = !is_escape;
false
} else {
let mut end = chr == end_chr;
if is_escape {
end = false;
}
is_escape = false;
end
};
Ok(m)
});
match result {
Ok(range) => {
let buf = tokenizer.drain_from_buf(range);
let buf = if literal_str {
buf
} else {
hex2bytes(&buf)
};
tokenizer.remove_buf_len(1);
let kind = if literal_str { PDFStrKind::Literal } else { PDFStrKind::Hexadecimal };
let value = PDFString::new(kind, buf);
Ok(PDFObject::String(value))
}
Err(_e) => Err(PDFParseError("String did not close properly")),
}
}
pub(crate) fn parse_stream(tokenizer: &mut Tokenizer, metadata: Dictionary) -> Result<PDFObject> {
if let Some(PDFObject::Number(PDFNumber::Unsigned(length))) = metadata.get(LENGTH) {
tokenizer.skip_crlf()?;
let length = *length as usize;
let buf = tokenizer.read_bytes(length)?;
if buf.len() != length {
return Err(PDFParseError0(format!("Require Stream length is {} but it is {}", length, buf.len())));
}
let stream = Stream::new(metadata, buf);
tokenizer.next_token()?.except(|token| token.key_was(END_STREAM))?;
return Ok(PDFObject::Stream(stream));
}
Err(PDFParseError("Stream length is not found"))
}