1use super::{ParseError, ParseResult};
6use super::lexer::{Lexer, Token};
7use std::collections::HashMap;
8use std::io::Read;
9
10#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub struct PdfName(pub String);
13
14#[derive(Debug, Clone, PartialEq)]
16pub struct PdfString(pub Vec<u8>);
17
18#[derive(Debug, Clone, PartialEq)]
20pub struct PdfArray(pub Vec<PdfObject>);
21
22#[derive(Debug, Clone, PartialEq)]
24pub struct PdfDictionary(pub HashMap<PdfName, PdfObject>);
25
26#[derive(Debug, Clone, PartialEq)]
28pub struct PdfStream {
29 pub dict: PdfDictionary,
30 pub data: Vec<u8>,
31}
32
33impl PdfStream {
34 pub fn decode(&self) -> ParseResult<Vec<u8>> {
36 super::filters::decode_stream(&self.data, &self.dict)
37 }
38
39 pub fn raw_data(&self) -> &[u8] {
41 &self.data
42 }
43}
44
45#[derive(Debug, Clone, PartialEq)]
47pub enum PdfObject {
48 Null,
49 Boolean(bool),
50 Integer(i64),
51 Real(f64),
52 String(PdfString),
53 Name(PdfName),
54 Array(PdfArray),
55 Dictionary(PdfDictionary),
56 Stream(PdfStream),
57 Reference(u32, u16), }
59
60impl PdfObject {
61 pub fn parse<R: Read>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
63 let token = lexer.next_token()?;
64 Self::parse_from_token(lexer, token)
65 }
66
67 fn parse_from_token<R: Read>(lexer: &mut Lexer<R>, token: Token) -> ParseResult<Self> {
69 match token {
70 Token::Null => Ok(PdfObject::Null),
71 Token::Boolean(b) => Ok(PdfObject::Boolean(b)),
72 Token::Integer(i) => {
73 if i < 0 || i > 9999999 {
75 return Ok(PdfObject::Integer(i));
76 }
77
78 match lexer.next_token()? {
80 Token::Integer(gen) if gen >= 0 && gen <= 65535 => {
81 match lexer.next_token()? {
83 Token::Name(s) if s == "R" => {
84 Ok(PdfObject::Reference(i as u32, gen as u16))
85 }
86 token => {
87 lexer.push_token(token);
89 lexer.push_token(Token::Integer(gen));
90 Ok(PdfObject::Integer(i))
91 }
92 }
93 }
94 token => {
95 lexer.push_token(token);
97 Ok(PdfObject::Integer(i))
98 }
99 }
100 }
101 Token::Real(r) => Ok(PdfObject::Real(r)),
102 Token::String(s) => Ok(PdfObject::String(PdfString(s))),
103 Token::Name(n) => Ok(PdfObject::Name(PdfName(n))),
104 Token::ArrayStart => Self::parse_array(lexer),
105 Token::DictStart => Self::parse_dictionary_or_stream(lexer),
106 Token::Comment(_) => {
107 Self::parse(lexer)
109 }
110 Token::StartXRef => {
111 Err(ParseError::SyntaxError {
113 position: 0,
114 message: "StartXRef encountered - this is not a PDF object".to_string(),
115 })
116 }
117 Token::Eof => Err(ParseError::SyntaxError {
118 position: 0,
119 message: "Unexpected end of file".to_string(),
120 }),
121 _ => Err(ParseError::UnexpectedToken {
122 expected: "PDF object".to_string(),
123 found: format!("{:?}", token),
124 }),
125 }
126 }
127
128 fn parse_array<R: Read>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
130 let mut elements = Vec::new();
131
132 loop {
133 let token = lexer.next_token()?;
134 match token {
135 Token::ArrayEnd => break,
136 Token::Comment(_) => continue, _ => {
138 let obj = Self::parse_from_token(lexer, token)?;
139 elements.push(obj);
140 }
141 }
142 }
143
144 Ok(PdfObject::Array(PdfArray(elements)))
145 }
146
147 fn parse_dictionary_or_stream<R: Read>(lexer: &mut Lexer<R>) -> ParseResult<Self> {
149 let dict = Self::parse_dictionary_inner(lexer)?;
150
151 loop {
153 let token = lexer.next_token()?;
154 match token {
156 Token::Stream => {
157 let stream_data = Self::parse_stream_data(lexer, &dict)?;
159 return Ok(PdfObject::Stream(PdfStream {
160 dict,
161 data: stream_data,
162 }));
163 }
164 Token::Comment(_) => {
165 continue;
167 }
168 Token::StartXRef => {
169 lexer.push_token(token);
173 return Ok(PdfObject::Dictionary(dict));
174 }
175 _ => {
176 lexer.push_token(token);
180 return Ok(PdfObject::Dictionary(dict));
181 }
182 }
183 }
184 }
185
186 fn parse_dictionary_inner<R: Read>(lexer: &mut Lexer<R>) -> ParseResult<PdfDictionary> {
188 let mut dict = HashMap::new();
189
190 loop {
191 let token = lexer.next_token()?;
192 match token {
193 Token::DictEnd => break,
194 Token::Comment(_) => continue, Token::Name(key) => {
196 let value = Self::parse(lexer)?;
197 dict.insert(PdfName(key), value);
198 }
199 _ => {
200 return Err(ParseError::UnexpectedToken {
201 expected: "dictionary key (name) or >>".to_string(),
202 found: format!("{:?}", token),
203 });
204 }
205 }
206 }
207
208 Ok(PdfDictionary(dict))
209 }
210
211 fn parse_stream_data<R: Read>(
213 lexer: &mut Lexer<R>,
214 dict: &PdfDictionary,
215 ) -> ParseResult<Vec<u8>> {
216 let length = dict.0.get(&PdfName("Length".to_string()))
218 .ok_or_else(|| ParseError::MissingKey("Length".to_string()))?;
219
220 let length = match length {
221 PdfObject::Integer(len) => *len as usize,
222 PdfObject::Reference(_, _) => {
223 return Err(ParseError::SyntaxError {
226 position: lexer.position(),
227 message: "Stream length references not yet supported".to_string(),
228 });
229 }
230 _ => {
231 return Err(ParseError::SyntaxError {
232 position: lexer.position(),
233 message: "Invalid stream length type".to_string(),
234 });
235 }
236 };
237
238 lexer.read_newline()?;
240
241 let stream_data = lexer.read_bytes(length)?;
243
244 lexer.skip_whitespace()?;
246
247 let token = lexer.next_token()?;
249 match token {
250 Token::EndStream => Ok(stream_data),
251 _ => Err(ParseError::UnexpectedToken {
252 expected: "endstream".to_string(),
253 found: format!("{:?}", token),
254 }),
255 }
256 }
257
258 pub fn is_null(&self) -> bool {
260 matches!(self, PdfObject::Null)
261 }
262
263 pub fn as_bool(&self) -> Option<bool> {
265 match self {
266 PdfObject::Boolean(b) => Some(*b),
267 _ => None,
268 }
269 }
270
271 pub fn as_integer(&self) -> Option<i64> {
273 match self {
274 PdfObject::Integer(i) => Some(*i),
275 _ => None,
276 }
277 }
278
279 pub fn as_real(&self) -> Option<f64> {
281 match self {
282 PdfObject::Real(r) => Some(*r),
283 PdfObject::Integer(i) => Some(*i as f64),
284 _ => None,
285 }
286 }
287
288 pub fn as_string(&self) -> Option<&PdfString> {
290 match self {
291 PdfObject::String(s) => Some(s),
292 _ => None,
293 }
294 }
295
296 pub fn as_name(&self) -> Option<&PdfName> {
298 match self {
299 PdfObject::Name(n) => Some(n),
300 _ => None,
301 }
302 }
303
304 pub fn as_array(&self) -> Option<&PdfArray> {
306 match self {
307 PdfObject::Array(a) => Some(a),
308 _ => None,
309 }
310 }
311
312 pub fn as_dict(&self) -> Option<&PdfDictionary> {
314 match self {
315 PdfObject::Dictionary(d) => Some(d),
316 PdfObject::Stream(s) => Some(&s.dict),
317 _ => None,
318 }
319 }
320
321 pub fn as_stream(&self) -> Option<&PdfStream> {
323 match self {
324 PdfObject::Stream(s) => Some(s),
325 _ => None,
326 }
327 }
328
329 pub fn as_reference(&self) -> Option<(u32, u16)> {
331 match self {
332 PdfObject::Reference(obj, gen) => Some((*obj, *gen)),
333 _ => None,
334 }
335 }
336}
337
338impl PdfDictionary {
339 pub fn new() -> Self {
341 PdfDictionary(HashMap::new())
342 }
343
344 pub fn get(&self, key: &str) -> Option<&PdfObject> {
346 self.0.get(&PdfName(key.to_string()))
347 }
348
349 pub fn insert(&mut self, key: String, value: PdfObject) {
351 self.0.insert(PdfName(key), value);
352 }
353
354 pub fn contains_key(&self, key: &str) -> bool {
356 self.0.contains_key(&PdfName(key.to_string()))
357 }
358
359 pub fn get_type(&self) -> Option<&str> {
361 self.get("Type").and_then(|obj| obj.as_name()).map(|n| n.0.as_str())
362 }
363}
364
365impl PdfArray {
366 pub fn new() -> Self {
368 PdfArray(Vec::new())
369 }
370
371 pub fn len(&self) -> usize {
373 self.0.len()
374 }
375
376 pub fn is_empty(&self) -> bool {
378 self.0.is_empty()
379 }
380
381 pub fn get(&self, index: usize) -> Option<&PdfObject> {
383 self.0.get(index)
384 }
385
386 pub fn push(&mut self, obj: PdfObject) {
388 self.0.push(obj);
389 }
390}
391
392impl PdfString {
393 pub fn new(data: Vec<u8>) -> Self {
395 PdfString(data)
396 }
397
398 pub fn as_str(&self) -> Result<&str, std::str::Utf8Error> {
400 std::str::from_utf8(&self.0)
401 }
402
403 pub fn as_bytes(&self) -> &[u8] {
405 &self.0
406 }
407}
408
409impl PdfName {
410 pub fn new(name: String) -> Self {
412 PdfName(name)
413 }
414
415 pub fn as_str(&self) -> &str {
417 &self.0
418 }
419}
420
421#[cfg(test)]
422mod tests {
423 use super::*;
424 use std::io::Cursor;
425
426 #[test]
427 fn test_parse_simple_objects() {
428 let input = b"null true false 123 -456 3.14 /Name (Hello)";
429 let mut lexer = Lexer::new(Cursor::new(input));
430
431 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Null);
432 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Boolean(true));
433 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Boolean(false));
434 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Integer(123));
435 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Integer(-456));
436 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Real(3.14));
437 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::Name(PdfName("Name".to_string())));
438 assert_eq!(PdfObject::parse(&mut lexer).unwrap(), PdfObject::String(PdfString(b"Hello".to_vec())));
439 }
440
441 #[test]
442 fn test_parse_array() {
443 let input = b"[100 200 300 /Name (test)]";
445 let mut lexer = Lexer::new(Cursor::new(input));
446
447 let obj = PdfObject::parse(&mut lexer).unwrap();
448 let array = obj.as_array().unwrap();
449
450 assert_eq!(array.len(), 5);
451 assert_eq!(array.get(0).unwrap().as_integer(), Some(100));
452 assert_eq!(array.get(1).unwrap().as_integer(), Some(200));
453 assert_eq!(array.get(2).unwrap().as_integer(), Some(300));
454 assert_eq!(array.get(3).unwrap().as_name().unwrap().as_str(), "Name");
455 assert_eq!(array.get(4).unwrap().as_string().unwrap().as_bytes(), b"test");
456 }
457
458 #[test]
459 fn test_parse_array_with_references() {
460 let input = b"[1 0 R 2 0 R]";
462 let mut lexer = Lexer::new(Cursor::new(input));
463
464 let obj = PdfObject::parse(&mut lexer).unwrap();
465 let array = obj.as_array().unwrap();
466
467 assert_eq!(array.len(), 2);
468 assert!(array.get(0).unwrap().as_reference().is_some());
469 assert!(array.get(1).unwrap().as_reference().is_some());
470 }
471
472 #[test]
473 fn test_parse_dictionary() {
474 let input = b"<< /Type /Page /Parent 1 0 R /MediaBox [0 0 612 792] >>";
475 let mut lexer = Lexer::new(Cursor::new(input));
476
477 let obj = PdfObject::parse(&mut lexer).unwrap();
478 let dict = obj.as_dict().unwrap();
479
480 assert_eq!(dict.get_type(), Some("Page"));
481 assert!(dict.get("Parent").unwrap().as_reference().is_some());
482 assert!(dict.get("MediaBox").unwrap().as_array().is_some());
483 }
484}