pdf/
backend.rs

1use crate::error::*;
2use crate::parser::Lexer;
3use crate::parser::read_xref_and_trailer_at;
4use crate::xref::XRefTable;
5use crate::primitive::Dictionary;
6use crate::object::*;
7use std::ops::Deref;
8
9use std::ops::{
10    RangeFull,
11    RangeFrom,
12    RangeTo,
13    Range,
14};
15
16pub const MAX_ID: u32 = 1_000_000;
17
18pub trait Backend: Sized {
19    fn read<T: IndexRange>(&self, range: T) -> Result<&[u8]>;
20    //fn write<T: IndexRange>(&mut self, range: T) -> Result<&mut [u8]>;
21    fn len(&self) -> usize;
22    fn is_empty(&self) -> bool {
23        self.len() == 0
24    }
25
26    /// Returns the offset of the beginning of the file, i.e., where the `%PDF-1.5` header is.
27    /// (currently only used internally!)
28    fn locate_start_offset(&self) -> Result<usize> {
29        // Read from the beginning of the file, and look for the header.
30        // Implementation note 13 in version 1.7 of the PDF reference says that Acrobat viewers
31        // expect the header to be within the first 1KB of the file, so we do the same here.
32        const HEADER: &[u8] = b"%PDF-";
33        let buf = t!(self.read(..std::cmp::min(1024, self.len())));
34        buf
35            .windows(HEADER.len())
36            .position(|window| window == HEADER)
37            .ok_or_else(|| PdfError::Other{ msg: "file header is missing".to_string() })
38    }
39
40    /// Returns the value of startxref (currently only used internally!)
41    fn locate_xref_offset(&self) -> Result<usize> {
42        // locate the xref offset at the end of the file
43        // `\nPOS\n%%EOF` where POS is the position encoded as base 10 integer.
44        // u64::MAX has 20 digits + \n\n(2) + %%EOF(5) = 27 bytes max.
45
46        let mut lexer = Lexer::new(t!(self.read(..)));
47        lexer.set_pos_from_end(0);
48        t!(lexer.seek_substr_back(b"startxref"));
49        t!(lexer.next()).to::<usize>()
50    }
51
52    /// Used internally by File, but could also be useful for applications that want to look at the raw PDF objects.
53    fn read_xref_table_and_trailer(&self, start_offset: usize, resolve: &impl Resolve) -> Result<(XRefTable, Dictionary)> {
54        let xref_offset = t!(self.locate_xref_offset());
55        let pos = t!(start_offset.checked_add(xref_offset).ok_or(PdfError::Invalid));
56        if pos >= self.len() {
57            bail!("XRef offset outside file bounds");
58        }
59
60        let mut lexer = Lexer::with_offset(t!(self.read(pos ..)), pos);
61        
62        let (xref_sections, trailer) = t!(read_xref_and_trailer_at(&mut lexer, resolve));
63        
64        let highest_id = t!(trailer.get("Size")
65            .ok_or_else(|| PdfError::MissingEntry {field: "Size".into(), typ: "XRefTable"})?
66            .as_u32());
67
68        if highest_id > MAX_ID {
69            bail!("too many objects");
70        }
71        let mut refs = XRefTable::new(highest_id as ObjNr);
72        for section in xref_sections {
73            refs.add_entries_from(section)?;
74        }
75        
76        let mut prev_trailer = {
77            match trailer.get("Prev") {
78                Some(p) => Some(t!(p.as_usize())),
79                None => None
80            }
81        };
82        trace!("READ XREF AND TABLE");
83        let mut seen = vec![];
84        while let Some(prev_xref_offset) = prev_trailer {
85            if seen.contains(&prev_xref_offset) {
86                bail!("xref offsets loop");
87            }
88            seen.push(prev_xref_offset);
89
90            let pos = t!(start_offset.checked_add(prev_xref_offset).ok_or(PdfError::Invalid));
91            let mut lexer = Lexer::with_offset(t!(self.read(pos..)), pos);
92            let (xref_sections, trailer) = t!(read_xref_and_trailer_at(&mut lexer, resolve));
93            
94            for section in xref_sections {
95                refs.add_entries_from(section)?;
96            }
97            
98            prev_trailer = {
99                match trailer.get("Prev") {
100                    Some(p) => {
101                        let prev = t!(p.as_usize());
102                        Some(prev)
103                    }
104                    None => None
105                }
106            };
107        }
108        Ok((refs, trailer))
109    }
110}
111
112
113impl<T> Backend for T where T: Deref<Target=[u8]> { //+ DerefMut<Target=[u8]> {
114    fn read<R: IndexRange>(&self, range: R) -> Result<&[u8]> {
115        let r = t!(range.to_range(self.len()));
116        Ok(&self[r])
117    }
118    /*
119    fn write<R: IndexRange>(&mut self, range: R) -> Result<&mut [u8]> {
120        let r = range.to_range(self.len())?;
121        Ok(&mut self[r])
122    }
123    */
124    fn len(&self) -> usize {
125        (**self).len()
126    }
127}
128
129/// `IndexRange` is implemented by Rust's built-in range types, produced
130/// by range syntax like `..`, `a..`, `..b` or `c..d`.
131pub trait IndexRange
132{
133    /// Start index (inclusive)
134    fn start(&self) -> Option<usize>;
135
136    /// End index (exclusive)
137    fn end(&self) -> Option<usize>;
138
139    /// `len`: the size of whatever container that is being indexed
140    fn to_range(&self, len: usize) -> Result<Range<usize>> {
141        match (self.start(), self.end()) {
142            (None, None) => Ok(0 .. len),
143            (Some(start), None) if start <= len => Ok(start .. len),
144            (None, Some(end)) if end <= len => Ok(0 .. end),
145            (Some(start), Some(end)) if start <= end && end <= len => Ok(start .. end),
146            _ => Err(PdfError::ContentReadPastBoundary)
147        }
148    }
149}
150
151
152impl IndexRange for RangeFull {
153    #[inline]
154    fn start(&self) -> Option<usize> { None }
155    #[inline]
156    fn end(&self) -> Option<usize> { None }
157
158}
159
160impl IndexRange for RangeFrom<usize> {
161    #[inline]
162    fn start(&self) -> Option<usize> { Some(self.start) }
163    #[inline]
164    fn end(&self) -> Option<usize> { None }
165}
166
167impl IndexRange for RangeTo<usize> {
168    #[inline]
169    fn start(&self) -> Option<usize> { None }
170    #[inline]
171    fn end(&self) -> Option<usize> { Some(self.end) }
172}
173
174impl IndexRange for Range<usize> {
175    #[inline]
176    fn start(&self) -> Option<usize> { Some(self.start) }
177    #[inline]
178    fn end(&self) -> Option<usize> { Some(self.end) }
179}