1mod lexer;
4mod parse_object;
5mod parse_xref;
6
7pub use self::lexer::*;
8pub use self::parse_object::*;
9pub use self::parse_xref::*;
10
11use crate::error::*;
12use crate::primitive::StreamInner;
13use crate::primitive::{Primitive, Dictionary, PdfStream, PdfString};
14use crate::object::{ObjNr, GenNr, PlainRef, Resolve};
15use crate::crypt::Decoder;
16use bitflags::bitflags;
17use istring::{SmallBytes, SmallString, IBytes};
18
19const MAX_DEPTH: usize = 20;
20
21
22bitflags! {
23 #[repr(transparent)]
24 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25 pub struct ParseFlags: u16 {
26 const INTEGER = 1 << 0;
27 const STREAM = 1 << 1;
28 const DICT = 1 << 2;
29 const NUMBER = 1 << 3;
30 const NAME = 1 << 4;
31 const ARRAY = 1 << 5;
32 const STRING = 1 << 6;
33 const BOOL = 1 << 7;
34 const NULL = 1 << 8;
35 const REF = 1 << 9;
36 const ANY = (1 << 10) - 1;
37 }
38}
39
40
41pub struct Context<'a> {
42 pub decoder: Option<&'a Decoder>,
43 pub id: PlainRef,
44}
45impl<'a> Context<'a> {
46 pub fn decrypt<'buf>(&self, data: &'buf mut [u8]) -> Result<&'buf [u8]> {
47 if let Some(decoder) = self.decoder {
48 decoder.decrypt(self.id, data)
49 } else {
50 Ok(data)
51 }
52 }
53 #[cfg(test)]
54 fn fake() -> Self {
55 Context {
56 decoder: None,
57 id: PlainRef { id: 0, gen: 0 }
58 }
59 }
60}
61
62pub fn parse(data: &[u8], r: &impl Resolve, flags: ParseFlags) -> Result<Primitive> {
65 parse_with_lexer(&mut Lexer::new(data), r, flags)
66}
67
68pub fn parse_with_lexer(lexer: &mut Lexer, r: &impl Resolve, flags: ParseFlags) -> Result<Primitive> {
71 parse_with_lexer_ctx(lexer, r, None, flags, MAX_DEPTH)
72}
73
74fn parse_dictionary_object(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, max_depth: usize) -> Result<Dictionary> {
75 let mut dict = Dictionary::default();
76 loop {
77 let token = t!(lexer.next());
79 if token.starts_with(b"/") {
80 let key = token.reslice(1..).to_name()?;
81 let obj = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth));
82 dict.insert(key, obj);
83 } else if token.equals(b">>") {
84 break;
85 } else {
86 err!(PdfError::UnexpectedLexeme{ pos: lexer.get_pos(), lexeme: token.to_string(), expected: "/ or >>"});
87 }
88 }
89 Ok(dict)
90}
91
92fn parse_stream_object(dict: Dictionary, lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
93 t!(lexer.next_stream());
94
95 let length = match dict.get("Length") {
96 Some(&Primitive::Integer(n)) if n >= 0 => n as usize,
97 Some(&Primitive::Reference(reference)) => t!(t!(r.resolve_flags(reference, ParseFlags::INTEGER, 1)).as_usize()),
98 Some(other) => err!(PdfError::UnexpectedPrimitive { expected: "unsigned Integer or Reference", found: other.get_debug_name() }),
99 None => err!(PdfError::MissingEntry { typ: "<Stream>", field: "Length".into() }),
100 };
101
102 let stream_substr = lexer.read_n(length);
103
104 if stream_substr.len() != length {
105 err!(PdfError::EOF)
106 }
107
108 t!(lexer.next_expect("endstream"));
110
111 Ok(PdfStream {
112 inner: StreamInner::InFile {
113 id: ctx.id,
114 file_range: stream_substr.file_range(),
115 },
116 info: dict,
117 })
118}
119
120#[inline]
121fn check(flags: ParseFlags, allowed: ParseFlags) -> Result<(), PdfError> {
122 if !flags.intersects(allowed) {
123 return Err(PdfError::PrimitiveNotAllowed { allowed, found: flags });
124 }
125 Ok(())
126}
127
128pub fn parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result<Primitive> {
131 let pos = lexer.get_pos();
132 match _parse_with_lexer_ctx(lexer, r, ctx, flags, max_depth) {
133 Ok(r) => Ok(r),
134 Err(e) => {
135 lexer.set_pos(pos);
136 Err(e)
137 }
138 }
139}
140fn _parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result<Primitive> {
141
142 let input = lexer.get_remaining_slice();
143 let first_lexeme = t!(lexer.next(), std::str::from_utf8(input));
144
145 let obj = if first_lexeme.equals(b"<<") {
146 check(flags, ParseFlags::DICT)?;
147
148 if max_depth == 0 {
149 return Err(PdfError::MaxDepth);
150 }
151 let dict = t!(parse_dictionary_object(lexer, r, ctx, max_depth-1));
152 if t!(lexer.peek()).equals(b"stream") {
154 let ctx = ctx.ok_or(PdfError::PrimitiveNotAllowed { allowed: ParseFlags::STREAM, found: flags })?;
155 Primitive::Stream(t!(parse_stream_object(dict, lexer, r, ctx)))
156 } else {
157 Primitive::Dictionary(dict)
158 }
159 } else if first_lexeme.is_integer() {
160 check(flags, ParseFlags::INTEGER | ParseFlags::REF)?;
162
163 let pos_bk = lexer.get_pos();
165
166 let second_lexeme = t!(lexer.next());
167 if second_lexeme.is_integer() {
168 let third_lexeme = t!(lexer.next());
169 if third_lexeme.equals(b"R") {
170 check(flags, ParseFlags::REF)?;
172 Primitive::Reference (PlainRef {
173 id: t!(first_lexeme.to::<ObjNr>()),
174 gen: t!(second_lexeme.to::<GenNr>()),
175 })
176 } else {
177 check(flags, ParseFlags::INTEGER)?;
178 lexer.set_pos(pos_bk); Primitive::Integer(t!(first_lexeme.to::<i32>()))
181 }
182 } else {
183 check(flags, ParseFlags::INTEGER)?;
184 lexer.set_pos(pos_bk); Primitive::Integer(t!(first_lexeme.to::<i32>()))
187 }
188 } else if let Some(s) = first_lexeme.real_number() {
189 check(flags, ParseFlags::NUMBER)?;
190 Primitive::Number (t!(s.to::<f32>(), s.to_string()))
192 } else if first_lexeme.starts_with(b"/") {
193 check(flags, ParseFlags::NAME)?;
194 let mut rest: &[u8] = &first_lexeme.reslice(1..);
197 let s = if rest.contains(&b'#') {
198 let mut s = IBytes::new();
199 while let Some(idx) = rest.iter().position(|&b| b == b'#') {
200 use crate::enc::decode_nibble;
201 use std::convert::TryInto;
202 let [hi, lo]: [u8; 2] = rest.get(idx+1 .. idx+3).ok_or(PdfError::EOF)?.try_into().unwrap();
203 let byte = match (decode_nibble(lo), decode_nibble(hi)) {
204 (Some(low), Some(high)) => low | high << 4,
205 _ => return Err(PdfError::HexDecode { pos: idx, bytes: [hi, lo] }),
206 };
207 s.extend_from_slice(&rest[..idx]);
208 s.push(byte);
209 rest = &rest[idx+3..];
210 }
211 s.extend_from_slice(rest);
212 SmallBytes::from(s.as_slice())
213 } else {
214 SmallBytes::from(rest)
215 };
216
217 Primitive::Name(SmallString::from_utf8(s)?)
218 } else if first_lexeme.equals(b"[") {
219 check(flags, ParseFlags::ARRAY)?;
220 if max_depth == 0 {
221 return Err(PdfError::MaxDepth);
222 }
223 let mut array = Vec::new();
224 loop {
226 if lexer.peek()?.equals(b"]") {
228 break;
229 }
230
231 let element = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth-1));
232 array.push(element);
233 }
234 t!(lexer.next()); Primitive::Array (array)
237 } else if first_lexeme.equals(b"(") {
238 check(flags, ParseFlags::STRING)?;
239 let mut string = IBytes::new();
240
241 let bytes_traversed = {
242 let mut string_lexer = StringLexer::new(lexer.get_remaining_slice());
243 for character in string_lexer.iter() {
244 string.push(t!(character));
245 }
246 string_lexer.get_offset()
247 };
248 lexer.offset_pos(bytes_traversed);
250 if let Some(ctx) = ctx {
252 string = t!(ctx.decrypt(&mut string)).into();
253 }
254 Primitive::String (PdfString::new(string))
255 } else if first_lexeme.equals(b"<") {
256 check(flags, ParseFlags::STRING)?;
257 let mut string = IBytes::new();
258
259 let bytes_traversed = {
260 let mut hex_string_lexer = HexStringLexer::new(lexer.get_remaining_slice());
261 for byte in hex_string_lexer.iter() {
262 string.push(t!(byte));
263 }
264 hex_string_lexer.get_offset()
265 };
266 lexer.offset_pos(bytes_traversed);
268
269 if let Some(ctx) = ctx {
271 string = t!(ctx.decrypt(&mut string)).into();
272 }
273 Primitive::String (PdfString::new(string))
274 } else if first_lexeme.equals(b"true") {
275 check(flags, ParseFlags::BOOL)?;
276 Primitive::Boolean (true)
277 } else if first_lexeme.equals(b"false") {
278 check(flags, ParseFlags::BOOL)?;
279 Primitive::Boolean (false)
280 } else if first_lexeme.equals(b"null") {
281 check(flags, ParseFlags::NULL)?;
282 Primitive::Null
283 } else {
284 err!(PdfError::UnknownType {pos: lexer.get_pos(), first_lexeme: first_lexeme.to_string(), rest: lexer.read_n(50).to_string()});
285 };
286
287 Ok(obj)
290}
291
292
293pub fn parse_stream(data: &[u8], resolve: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
294 parse_stream_with_lexer(&mut Lexer::new(data), resolve, ctx)
295}
296
297
298fn parse_stream_with_lexer(lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
299 let first_lexeme = t!(lexer.next());
300
301 let obj = if first_lexeme.equals(b"<<") {
302 let dict = t!(parse_dictionary_object(lexer, r, None, MAX_DEPTH));
303 if t!(lexer.peek()).equals(b"stream") {
305 let ctx = Context {
306 decoder: None,
307 id: ctx.id
308 };
309 t!(parse_stream_object(dict, lexer, r, &ctx))
310 } else {
311 err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "Dictionary" });
312 }
313 } else {
314 err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "something else" });
315 };
316
317 Ok(obj)
318}
319
320#[cfg(test)]
321mod tests {
322 #[test]
323 fn dict_with_empty_name_as_value() {
324 use crate::object::NoResolve;
325 use super::{ParseFlags, Context};
326 {
327 let data = b"<</App<</Name/>>>>";
328 let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap();
329 let dict = primitive.into_dictionary().unwrap();
330
331 assert_eq!(dict.len(), 1);
332 let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap();
333 assert_eq!(app_dict.len(), 1);
334 let name = app_dict.get("Name").unwrap().as_name().unwrap();
335 assert_eq!(name, "");
336 }
337
338 {
339 let data = b"<</Length 0/App<</Name/>>>>stream\nendstream\n";
340 let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap();
341 let dict = stream.info;
342
343 assert_eq!(dict.len(), 2);
344 let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap();
345 assert_eq!(app_dict.len(), 1);
346 let name = app_dict.get("Name").unwrap().as_name().unwrap();
347 assert_eq!(name, "");
348 }
349 }
350
351 #[test]
352 fn dict_with_empty_name_as_key() {
353 use crate::object::NoResolve;
354 use super::{ParseFlags, Context};
355
356 {
357 let data = b"<</ true>>";
358 let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap();
359 let dict = primitive.into_dictionary().unwrap();
360
361 assert_eq!(dict.len(), 1);
362 assert!(dict.get("").unwrap().as_bool().unwrap());
363 }
364
365 {
366 let data = b"<</Length 0/ true>>stream\nendstream\n";
367 let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap();
368 let dict = stream.info;
369
370 assert_eq!(dict.len(), 2);
371 assert!(dict.get("").unwrap().as_bool().unwrap());
372 }
373 }
374
375 #[test]
376 fn empty_array() {
377 use crate::object::NoResolve;
378 use super::ParseFlags;
379
380 let data = b"[]";
381 let primitive = super::parse(data, &NoResolve, ParseFlags::ARRAY).unwrap();
382 let array = primitive.into_array().unwrap();
383 assert!(array.is_empty());
384 }
385
386 #[test]
387 fn compact_array() {
388 use crate::object::NoResolve;
389 use crate::primitive::{Primitive, PdfString};
390 use super::lexer::Lexer;
391 use super::*;
392 let mut lx = Lexer::new(b"[(Complete L)20(egend for Physical and P)20(olitical Maps)]TJ");
393 assert_eq!(parse_with_lexer(&mut lx, &NoResolve, ParseFlags::ANY).unwrap(),
394 Primitive::Array(vec![
395 Primitive::String(PdfString::new("Complete L".into())),
396 Primitive::Integer(20),
397 Primitive::String(PdfString::new("egend for Physical and P".into())),
398 Primitive::Integer(20),
399 Primitive::String(PdfString::new("olitical Maps".into()))
400 ])
401 );
402 assert_eq!(lx.next().unwrap().as_str().unwrap(), "TJ");
403 assert!(lx.next().unwrap_err().is_eof());
404 }
405}