1mod lexer;
4mod parse_object;
5mod parse_xref;
6
7pub use self::lexer::*;
8pub use self::parse_object::*;
9pub use self::parse_xref::*;
10
11use crate::error::*;
12use crate::primitive::StreamInner;
13use crate::primitive::{Primitive, Dictionary, PdfStream, PdfString};
14use crate::object::{ObjNr, GenNr, PlainRef, Resolve};
15use crate::crypt::Decoder;
16use bitflags::bitflags;
17use istring::{SmallBytes, SmallString, IBytes};
18
19const MAX_DEPTH: usize = 20;
20
21
22bitflags! {
23 pub struct ParseFlags: u16 {
24 const INTEGER = 1 << 0;
25 const STREAM = 1 << 1;
26 const DICT = 1 << 2;
27 const NUMBER = 1 << 3;
28 const NAME = 1 << 4;
29 const ARRAY = 1 << 5;
30 const STRING = 1 << 6;
31 const BOOL = 1 << 7;
32 const NULL = 1 << 8;
33 const REF = 1 << 9;
34 const ANY = (1 << 10) - 1;
35 }
36}
37
38
39pub struct Context<'a> {
40 pub decoder: Option<&'a Decoder>,
41 pub id: PlainRef,
42}
43impl<'a> Context<'a> {
44 pub fn decrypt<'buf>(&self, data: &'buf mut [u8]) -> Result<&'buf [u8]> {
45 if let Some(decoder) = self.decoder {
46 decoder.decrypt(self.id, data)
47 } else {
48 Ok(data)
49 }
50 }
51 #[cfg(test)]
52 fn fake() -> Self {
53 Context {
54 decoder: None,
55 id: PlainRef { id: 0, gen: 0 }
56 }
57 }
58}
59
60pub fn parse(data: &[u8], r: &impl Resolve, flags: ParseFlags) -> Result<Primitive> {
63 parse_with_lexer(&mut Lexer::new(data), r, flags)
64}
65
66pub fn parse_with_lexer(lexer: &mut Lexer, r: &impl Resolve, flags: ParseFlags) -> Result<Primitive> {
69 parse_with_lexer_ctx(lexer, r, None, flags, MAX_DEPTH)
70}
71
72fn parse_dictionary_object(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, max_depth: usize) -> Result<Dictionary> {
73 let mut dict = Dictionary::default();
74 loop {
75 let token = t!(lexer.next());
77 if token.starts_with(b"/") {
78 let key = token.reslice(1..).to_name()?;
79 let obj = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth));
80 dict.insert(key, obj);
81 } else if token.equals(b">>") {
82 break;
83 } else {
84 err!(PdfError::UnexpectedLexeme{ pos: lexer.get_pos(), lexeme: token.to_string(), expected: "/ or >>"});
85 }
86 }
87 Ok(dict)
88}
89
90fn parse_stream_object(dict: Dictionary, lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
91 t!(lexer.next_stream());
92
93 let length = match dict.get("Length") {
94 Some(&Primitive::Integer(n)) if n >= 0 => n as usize,
95 Some(&Primitive::Reference(reference)) => t!(t!(r.resolve_flags(reference, ParseFlags::INTEGER, 1)).as_usize()),
96 Some(other) => err!(PdfError::UnexpectedPrimitive { expected: "unsigned Integer or Reference", found: other.get_debug_name() }),
97 None => err!(PdfError::MissingEntry { typ: "<Stream>", field: "Length".into() }),
98 };
99
100 let stream_substr = lexer.read_n(length);
101
102 if stream_substr.len() != length {
103 err!(PdfError::EOF)
104 }
105
106 t!(lexer.next_expect("endstream"));
108
109 Ok(PdfStream {
110 inner: StreamInner::InFile {
111 id: ctx.id,
112 file_range: stream_substr.file_range(),
113 },
114 info: dict,
115 })
116}
117
118#[inline]
119fn check(flags: ParseFlags, allowed: ParseFlags) -> Result<(), PdfError> {
120 if !flags.intersects(allowed) {
121 return Err(PdfError::PrimitiveNotAllowed { allowed, found: flags });
122 }
123 Ok(())
124}
125
126pub fn parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result<Primitive> {
129 let pos = lexer.get_pos();
130 match _parse_with_lexer_ctx(lexer, r, ctx, flags, max_depth) {
131 Ok(r) => Ok(r),
132 Err(e) => {
133 lexer.set_pos(pos);
134 Err(e)
135 }
136 }
137}
138fn _parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result<Primitive> {
139
140 let input = lexer.get_remaining_slice();
141 let first_lexeme = t!(lexer.next(), std::str::from_utf8(input));
142
143 let obj = if first_lexeme.equals(b"<<") {
144 check(flags, ParseFlags::DICT)?;
145
146 if max_depth == 0 {
147 return Err(PdfError::MaxDepth);
148 }
149 let dict = t!(parse_dictionary_object(lexer, r, ctx, max_depth-1));
150 if t!(lexer.peek()).equals(b"stream") {
152 let ctx = ctx.ok_or(PdfError::PrimitiveNotAllowed { allowed: ParseFlags::STREAM, found: flags })?;
153 Primitive::Stream(t!(parse_stream_object(dict, lexer, r, ctx)))
154 } else {
155 Primitive::Dictionary(dict)
156 }
157 } else if first_lexeme.is_integer() {
158 check(flags, ParseFlags::INTEGER | ParseFlags::REF)?;
160
161 let pos_bk = lexer.get_pos();
163
164 let second_lexeme = t!(lexer.next());
165 if second_lexeme.is_integer() {
166 let third_lexeme = t!(lexer.next());
167 if third_lexeme.equals(b"R") {
168 check(flags, ParseFlags::REF)?;
170 Primitive::Reference (PlainRef {
171 id: t!(first_lexeme.to::<ObjNr>()),
172 gen: t!(second_lexeme.to::<GenNr>()),
173 })
174 } else {
175 check(flags, ParseFlags::INTEGER)?;
176 lexer.set_pos(pos_bk); Primitive::Integer(t!(first_lexeme.to::<i32>()))
179 }
180 } else {
181 check(flags, ParseFlags::INTEGER)?;
182 lexer.set_pos(pos_bk); Primitive::Integer(t!(first_lexeme.to::<i32>()))
185 }
186 } else if let Some(s) = first_lexeme.real_number() {
187 check(flags, ParseFlags::NUMBER)?;
188 Primitive::Number (t!(s.to::<f32>(), s.to_string()))
190 } else if first_lexeme.starts_with(b"/") {
191 check(flags, ParseFlags::NAME)?;
192 let mut rest: &[u8] = &first_lexeme.reslice(1..);
195 let s = if rest.contains(&b'#') {
196 let mut s = IBytes::new();
197 while let Some(idx) = rest.iter().position(|&b| b == b'#') {
198 use crate::enc::decode_nibble;
199 use std::convert::TryInto;
200 let [hi, lo]: [u8; 2] = rest.get(idx+1 .. idx+3).ok_or(PdfError::EOF)?.try_into().unwrap();
201 let byte = match (decode_nibble(lo), decode_nibble(hi)) {
202 (Some(low), Some(high)) => low | high << 4,
203 _ => return Err(PdfError::HexDecode { pos: idx, bytes: [hi, lo] }),
204 };
205 s.extend_from_slice(&rest[..idx]);
206 s.push(byte);
207 rest = &rest[idx+3..];
208 }
209 s.extend_from_slice(rest);
210 SmallBytes::from(s.as_slice())
211 } else {
212 SmallBytes::from(rest)
213 };
214
215 Primitive::Name(SmallString::from_utf8(s)?)
216 } else if first_lexeme.equals(b"[") {
217 check(flags, ParseFlags::ARRAY)?;
218 if max_depth == 0 {
219 return Err(PdfError::MaxDepth);
220 }
221 let mut array = Vec::new();
222 loop {
224 if lexer.peek()?.equals(b"]") {
226 break;
227 }
228
229 let element = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth-1));
230 array.push(element);
231 }
232 t!(lexer.next()); Primitive::Array (array)
235 } else if first_lexeme.equals(b"(") {
236 check(flags, ParseFlags::STRING)?;
237 let mut string = IBytes::new();
238
239 let bytes_traversed = {
240 let mut string_lexer = StringLexer::new(lexer.get_remaining_slice());
241 for character in string_lexer.iter() {
242 string.push(t!(character));
243 }
244 string_lexer.get_offset()
245 };
246 lexer.offset_pos(bytes_traversed);
248 if let Some(ctx) = ctx {
250 string = t!(ctx.decrypt(&mut string)).into();
251 }
252 Primitive::String (PdfString::new(string))
253 } else if first_lexeme.equals(b"<") {
254 check(flags, ParseFlags::STRING)?;
255 let mut string = IBytes::new();
256
257 let bytes_traversed = {
258 let mut hex_string_lexer = HexStringLexer::new(lexer.get_remaining_slice());
259 for byte in hex_string_lexer.iter() {
260 string.push(t!(byte));
261 }
262 hex_string_lexer.get_offset()
263 };
264 lexer.offset_pos(bytes_traversed);
266
267 if let Some(ctx) = ctx {
269 string = t!(ctx.decrypt(&mut string)).into();
270 }
271 Primitive::String (PdfString::new(string))
272 } else if first_lexeme.equals(b"true") {
273 check(flags, ParseFlags::BOOL)?;
274 Primitive::Boolean (true)
275 } else if first_lexeme.equals(b"false") {
276 check(flags, ParseFlags::BOOL)?;
277 Primitive::Boolean (false)
278 } else if first_lexeme.equals(b"null") {
279 check(flags, ParseFlags::NULL)?;
280 Primitive::Null
281 } else {
282 err!(PdfError::UnknownType {pos: lexer.get_pos(), first_lexeme: first_lexeme.to_string(), rest: lexer.read_n(50).to_string()});
283 };
284
285 Ok(obj)
288}
289
290
291pub fn parse_stream(data: &[u8], resolve: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
292 parse_stream_with_lexer(&mut Lexer::new(data), resolve, ctx)
293}
294
295
296fn parse_stream_with_lexer(lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
297 let first_lexeme = t!(lexer.next());
298
299 let obj = if first_lexeme.equals(b"<<") {
300 let dict = parse_dictionary_object(lexer, r, None, MAX_DEPTH)?;
301 if t!(lexer.peek()).equals(b"stream") {
303 let ctx = Context {
304 decoder: None,
305 id: ctx.id
306 };
307 t!(parse_stream_object(dict, lexer, r, &ctx))
308 } else {
309 err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "Dictionary" });
310 }
311 } else {
312 err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "something else" });
313 };
314
315 Ok(obj)
316}
317
318#[cfg(test)]
319mod tests {
320 #[test]
321 fn dict_with_empty_name_as_value() {
322 use crate::object::NoResolve;
323 use super::{ParseFlags, Context};
324 {
325 let data = b"<</App<</Name/>>>>";
326 let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap();
327 let dict = primitive.into_dictionary().unwrap();
328
329 assert_eq!(dict.len(), 1);
330 let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap();
331 assert_eq!(app_dict.len(), 1);
332 let name = app_dict.get("Name").unwrap().as_name().unwrap();
333 assert_eq!(name, "");
334 }
335
336 {
337 let data = b"<</Length 0/App<</Name/>>>>stream\nendstream\n";
338 let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap();
339 let dict = stream.info;
340
341 assert_eq!(dict.len(), 2);
342 let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap();
343 assert_eq!(app_dict.len(), 1);
344 let name = app_dict.get("Name").unwrap().as_name().unwrap();
345 assert_eq!(name, "");
346 }
347 }
348
349 #[test]
350 fn dict_with_empty_name_as_key() {
351 use crate::object::NoResolve;
352 use super::{ParseFlags, Context};
353
354 {
355 let data = b"<</ true>>";
356 let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap();
357 let dict = primitive.into_dictionary().unwrap();
358
359 assert_eq!(dict.len(), 1);
360 assert!(dict.get("").unwrap().as_bool().unwrap());
361 }
362
363 {
364 let data = b"<</Length 0/ true>>stream\nendstream\n";
365 let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap();
366 let dict = stream.info;
367
368 assert_eq!(dict.len(), 2);
369 assert!(dict.get("").unwrap().as_bool().unwrap());
370 }
371 }
372
373 #[test]
374 fn empty_array() {
375 use crate::object::NoResolve;
376 use super::ParseFlags;
377
378 let data = b"[]";
379 let primitive = super::parse(data, &NoResolve, ParseFlags::ARRAY).unwrap();
380 let array = primitive.into_array().unwrap();
381 assert!(array.is_empty());
382 }
383
384 #[test]
385 fn compact_array() {
386 use crate::object::NoResolve;
387 use crate::primitive::{Primitive, PdfString};
388 use super::lexer::Lexer;
389 use super::*;
390 let mut lx = Lexer::new(b"[(Complete L)20(egend for Physical and P)20(olitical Maps)]TJ");
391 assert_eq!(parse_with_lexer(&mut lx, &NoResolve, ParseFlags::ANY).unwrap(),
392 Primitive::Array(vec![
393 Primitive::String(PdfString::new("Complete L".into())),
394 Primitive::Integer(20),
395 Primitive::String(PdfString::new("egend for Physical and P".into())),
396 Primitive::Integer(20),
397 Primitive::String(PdfString::new("olitical Maps".into()))
398 ])
399 );
400 assert_eq!(lx.next().unwrap().as_str().unwrap(), "TJ");
401 assert!(lx.next().unwrap_err().is_eof());
402 }
403}