1mod lexer;
4mod parse_object;
5mod parse_xref;
6
7pub use self::lexer::*;
8pub use self::parse_object::*;
9pub use self::parse_xref::*;
10
11use crate::error::*;
12use crate::primitive::StreamInner;
13use crate::primitive::{Primitive, Dictionary, PdfStream, PdfString};
14use crate::object::{ObjNr, GenNr, PlainRef, Resolve};
15use bitflags::bitflags;
17use istring::{SmallBytes, SmallString, IBytes};
18
19const MAX_DEPTH: usize = 20;
20
21
22bitflags! {
23 #[repr(transparent)]
24 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25 pub struct ParseFlags: u16 {
26 const INTEGER = 1 << 0;
27 const STREAM = 1 << 1;
28 const DICT = 1 << 2;
29 const NUMBER = 1 << 3;
30 const NAME = 1 << 4;
31 const ARRAY = 1 << 5;
32 const STRING = 1 << 6;
33 const BOOL = 1 << 7;
34 const NULL = 1 << 8;
35 const REF = 1 << 9;
36 const ANY = (1 << 10) - 1;
37 }
38}
39
40
41pub struct Context<'a> {
42 pub id: PlainRef,
43 _phantom: std::marker::PhantomData<&'a ()>,
44}
45impl<'a> Context<'a> {
46 pub fn decrypt<'buf>(&self, data: &'buf mut [u8]) -> Result<&'buf [u8]> {
47 Ok(data)
49 }
50 #[cfg(test)]
51 fn fake() -> Self {
52 Context {
53 id: PlainRef { id: 0, gen: 0 },
54 _phantom: std::marker::PhantomData,
55 }
56 }
57}
58
59pub fn parse(data: &[u8], r: &impl Resolve, flags: ParseFlags) -> Result<Primitive> {
62 parse_with_lexer(&mut Lexer::new(data), r, flags)
63}
64
65pub fn parse_with_lexer(lexer: &mut Lexer, r: &impl Resolve, flags: ParseFlags) -> Result<Primitive> {
68 parse_with_lexer_ctx(lexer, r, None, flags, MAX_DEPTH)
69}
70
71fn parse_dictionary_object(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, max_depth: usize) -> Result<Dictionary> {
72 let mut dict = Dictionary::default();
73 loop {
74 let token = t!(lexer.next());
76 if token.starts_with(b"/") {
77 let key = token.reslice(1..).to_name()?;
78 let obj = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth));
79 dict.insert(key, obj);
80 } else if token.equals(b">>") {
81 break;
82 } else {
83 err!(PdfError::UnexpectedLexeme{ pos: lexer.get_pos(), lexeme: token.to_string(), expected: "/ or >>"});
84 }
85 }
86 Ok(dict)
87}
88
89fn parse_stream_object(dict: Dictionary, lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
90 t!(lexer.next_stream());
91
92 let length = match dict.get("Length") {
93 Some(&Primitive::Integer(n)) if n >= 0 => n as usize,
94 Some(&Primitive::Reference(reference)) => t!(t!(r.resolve_flags(reference, ParseFlags::INTEGER, 1)).as_usize()),
95 Some(other) => err!(PdfError::UnexpectedPrimitive { expected: "unsigned Integer or Reference", found: other.get_debug_name() }),
96 None => err!(PdfError::MissingEntry { typ: "<Stream>", field: "Length".into() }),
97 };
98
99 let stream_substr = lexer.read_n(length);
100
101 if stream_substr.len() != length {
102 err!(PdfError::EOF)
103 }
104
105 t!(lexer.next_expect("endstream"));
107
108 Ok(PdfStream {
109 inner: StreamInner::InFile {
110 id: ctx.id,
111 file_range: stream_substr.file_range(),
112 },
113 info: dict,
114 })
115}
116
117#[inline]
118fn check(flags: ParseFlags, allowed: ParseFlags) -> Result<(), PdfError> {
119 if !flags.intersects(allowed) {
120 return Err(PdfError::PrimitiveNotAllowed { allowed, found: flags });
121 }
122 Ok(())
123}
124
125pub fn parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result<Primitive> {
128 let pos = lexer.get_pos();
129 match _parse_with_lexer_ctx(lexer, r, ctx, flags, max_depth) {
130 Ok(r) => Ok(r),
131 Err(e) => {
132 lexer.set_pos(pos);
133 Err(e)
134 }
135 }
136}
137fn _parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result<Primitive> {
138
139 let input = lexer.get_remaining_slice();
140 let first_lexeme = t!(lexer.next(), std::str::from_utf8(input));
141
142 let obj = if first_lexeme.equals(b"<<") {
143 check(flags, ParseFlags::DICT)?;
144
145 if max_depth == 0 {
146 return Err(PdfError::MaxDepth);
147 }
148 let dict = t!(parse_dictionary_object(lexer, r, ctx, max_depth-1));
149 if t!(lexer.peek()).equals(b"stream") {
151 let ctx = ctx.ok_or(PdfError::PrimitiveNotAllowed { allowed: ParseFlags::STREAM, found: flags })?;
152 Primitive::Stream(t!(parse_stream_object(dict, lexer, r, ctx)))
153 } else {
154 Primitive::Dictionary(dict)
155 }
156 } else if first_lexeme.is_integer() {
157 check(flags, ParseFlags::INTEGER | ParseFlags::REF)?;
159
160 let pos_bk = lexer.get_pos();
162
163 let second_lexeme = t!(lexer.next());
164 if second_lexeme.is_integer() {
165 let third_lexeme = t!(lexer.next());
166 if third_lexeme.equals(b"R") {
167 check(flags, ParseFlags::REF)?;
169 Primitive::Reference (PlainRef {
170 id: t!(first_lexeme.to::<ObjNr>()),
171 gen: t!(second_lexeme.to::<GenNr>()),
172 })
173 } else {
174 check(flags, ParseFlags::INTEGER)?;
175 lexer.set_pos(pos_bk); Primitive::Integer(t!(first_lexeme.to::<i32>()))
178 }
179 } else {
180 check(flags, ParseFlags::INTEGER)?;
181 lexer.set_pos(pos_bk); Primitive::Integer(t!(first_lexeme.to::<i32>()))
184 }
185 } else if let Some(s) = first_lexeme.real_number() {
186 check(flags, ParseFlags::NUMBER)?;
187 Primitive::Number (t!(s.to::<f32>(), s.to_string()))
189 } else if first_lexeme.starts_with(b"/") {
190 check(flags, ParseFlags::NAME)?;
191 let mut rest: &[u8] = &first_lexeme.reslice(1..);
194 let s = if rest.contains(&b'#') {
195 let mut s = IBytes::new();
196 while let Some(idx) = rest.iter().position(|&b| b == b'#') {
197 use crate::enc::decode_nibble;
198 use std::convert::TryInto;
199 let [hi, lo]: [u8; 2] = rest.get(idx+1 .. idx+3).ok_or(PdfError::EOF)?.try_into().unwrap();
200 let byte = match (decode_nibble(lo), decode_nibble(hi)) {
201 (Some(low), Some(high)) => low | high << 4,
202 _ => return Err(PdfError::HexDecode { pos: idx, bytes: [hi, lo] }),
203 };
204 s.extend_from_slice(&rest[..idx]);
205 s.push(byte);
206 rest = &rest[idx+3..];
207 }
208 s.extend_from_slice(rest);
209 SmallBytes::from(s.as_slice())
210 } else {
211 SmallBytes::from(rest)
212 };
213
214 Primitive::Name(SmallString::from_utf8(s)?)
215 } else if first_lexeme.equals(b"[") {
216 check(flags, ParseFlags::ARRAY)?;
217 if max_depth == 0 {
218 return Err(PdfError::MaxDepth);
219 }
220 let mut array = Vec::new();
221 loop {
223 if lexer.peek()?.equals(b"]") {
225 break;
226 }
227
228 let element = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth-1));
229 array.push(element);
230 }
231 t!(lexer.next()); Primitive::Array (array)
234 } else if first_lexeme.equals(b"(") {
235 check(flags, ParseFlags::STRING)?;
236 let mut string = IBytes::new();
237
238 let bytes_traversed = {
239 let mut string_lexer = StringLexer::new(lexer.get_remaining_slice());
240 for character in string_lexer.iter() {
241 string.push(t!(character));
242 }
243 string_lexer.get_offset()
244 };
245 lexer.offset_pos(bytes_traversed);
247 if let Some(ctx) = ctx {
249 string = t!(ctx.decrypt(&mut string)).into();
250 }
251 Primitive::String (PdfString::new(string))
252 } else if first_lexeme.equals(b"<") {
253 check(flags, ParseFlags::STRING)?;
254 let mut string = IBytes::new();
255
256 let bytes_traversed = {
257 let mut hex_string_lexer = HexStringLexer::new(lexer.get_remaining_slice());
258 for byte in hex_string_lexer.iter() {
259 string.push(t!(byte));
260 }
261 hex_string_lexer.get_offset()
262 };
263 lexer.offset_pos(bytes_traversed);
265
266 if let Some(ctx) = ctx {
268 string = t!(ctx.decrypt(&mut string)).into();
269 }
270 Primitive::String (PdfString::new(string))
271 } else if first_lexeme.equals(b"true") {
272 check(flags, ParseFlags::BOOL)?;
273 Primitive::Boolean (true)
274 } else if first_lexeme.equals(b"false") {
275 check(flags, ParseFlags::BOOL)?;
276 Primitive::Boolean (false)
277 } else if first_lexeme.equals(b"null") {
278 check(flags, ParseFlags::NULL)?;
279 Primitive::Null
280 } else {
281 err!(PdfError::UnknownType {pos: lexer.get_pos(), first_lexeme: first_lexeme.to_string(), rest: lexer.read_n(50).to_string()});
282 };
283
284 Ok(obj)
287}
288
289
290pub fn parse_stream(data: &[u8], resolve: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
291 parse_stream_with_lexer(&mut Lexer::new(data), resolve, ctx)
292}
293
294
295fn parse_stream_with_lexer(lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result<PdfStream> {
296 let first_lexeme = t!(lexer.next());
297
298 let obj = if first_lexeme.equals(b"<<") {
299 let dict = t!(parse_dictionary_object(lexer, r, None, MAX_DEPTH));
300 if t!(lexer.peek()).equals(b"stream") {
302 let ctx = Context {
303 id: ctx.id,
305 _phantom: std::marker::PhantomData,
306 };
307 t!(parse_stream_object(dict, lexer, r, &ctx))
308 } else {
309 err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "Dictionary" });
310 }
311 } else {
312 err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "something else" });
313 };
314
315 Ok(obj)
316}
317
318#[cfg(test)]
319mod tests {
320 #[test]
321 fn dict_with_empty_name_as_value() {
322 use crate::object::NoResolve;
323 use super::{ParseFlags, Context};
324 {
325 let data = b"<</App<</Name/>>>>";
326 let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap();
327 let dict = primitive.into_dictionary().unwrap();
328
329 assert_eq!(dict.len(), 1);
330 let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap();
331 assert_eq!(app_dict.len(), 1);
332 let name = app_dict.get("Name").unwrap().as_name().unwrap();
333 assert_eq!(name, "");
334 }
335
336 {
337 let data = b"<</Length 0/App<</Name/>>>>stream\nendstream\n";
338 let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap();
339 let dict = stream.info;
340
341 assert_eq!(dict.len(), 2);
342 let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap();
343 assert_eq!(app_dict.len(), 1);
344 let name = app_dict.get("Name").unwrap().as_name().unwrap();
345 assert_eq!(name, "");
346 }
347 }
348
349 #[test]
350 fn dict_with_empty_name_as_key() {
351 use crate::object::NoResolve;
352 use super::{ParseFlags, Context};
353
354 {
355 let data = b"<</ true>>";
356 let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap();
357 let dict = primitive.into_dictionary().unwrap();
358
359 assert_eq!(dict.len(), 1);
360 assert!(dict.get("").unwrap().as_bool().unwrap());
361 }
362
363 {
364 let data = b"<</Length 0/ true>>stream\nendstream\n";
365 let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap();
366 let dict = stream.info;
367
368 assert_eq!(dict.len(), 2);
369 assert!(dict.get("").unwrap().as_bool().unwrap());
370 }
371 }
372
373 #[test]
374 fn empty_array() {
375 use crate::object::NoResolve;
376 use super::ParseFlags;
377
378 let data = b"[]";
379 let primitive = super::parse(data, &NoResolve, ParseFlags::ARRAY).unwrap();
380 let array = primitive.into_array().unwrap();
381 assert!(array.is_empty());
382 }
383
384 #[test]
385 fn compact_array() {
386 use crate::object::NoResolve;
387 use crate::primitive::{Primitive, PdfString};
388 use super::lexer::Lexer;
389 use super::*;
390 let mut lx = Lexer::new(b"[(Complete L)20(egend for Physical and P)20(olitical Maps)]TJ");
391 assert_eq!(parse_with_lexer(&mut lx, &NoResolve, ParseFlags::ANY).unwrap(),
392 Primitive::Array(vec![
393 Primitive::String(PdfString::new("Complete L".into())),
394 Primitive::Integer(20),
395 Primitive::String(PdfString::new("egend for Physical and P".into())),
396 Primitive::Integer(20),
397 Primitive::String(PdfString::new("olitical Maps".into()))
398 ])
399 );
400 assert_eq!(lx.next().unwrap().as_str().unwrap(), "TJ");
401 assert!(lx.next().unwrap_err().is_eof());
402 }
403}