1use crate::PdfData;
4use crate::object::Object;
5use crate::page::Pages;
6use crate::page::cached::CachedPages;
7use crate::reader::Reader;
8use crate::sync::Arc;
9use crate::xref::{XRef, XRefError, fallback, root_xref};
10
11pub use crate::crypto::DecryptionError;
12use crate::metadata::Metadata;
13
14pub struct Pdf {
16 xref: Arc<XRef>,
17 header_version: PdfVersion,
18 pages: CachedPages,
19 data: PdfData,
20}
21
22pub const MAX_OBJECTS: usize = 500_000;
28
29pub const MAX_PAGES: usize = 50_000;
34
35#[derive(Debug, Copy, Clone, PartialEq, Eq)]
37pub enum LoadPdfError {
38 Decryption(DecryptionError),
40 Invalid,
42 TooLarge(usize, usize),
47}
48
49#[allow(clippy::len_without_is_empty)]
50impl Pdf {
51 pub fn new(data: impl Into<PdfData>) -> Result<Self, LoadPdfError> {
55 Self::new_with_password(data, "")
56 }
57
58 pub fn new_with_password(
62 data: impl Into<PdfData>,
63 password: &str,
64 ) -> Result<Self, LoadPdfError> {
65 let data = data.into();
66 let password = password.as_bytes();
67 let version = find_version(data.as_ref()).unwrap_or(PdfVersion::Pdf10);
68 let xref = match root_xref(data.clone(), password) {
69 Ok(x) => x,
70 Err(e) => match e {
71 XRefError::Unknown => {
72 fallback(data.clone(), password).ok_or(LoadPdfError::Invalid)?
73 }
74 XRefError::Encryption(e) => return Err(LoadPdfError::Decryption(e)),
75 },
76 };
77 let xref = Arc::new(xref);
78
79 let object_count = xref.len();
83 if object_count > MAX_OBJECTS {
84 return Err(LoadPdfError::TooLarge(object_count, 0));
85 }
86
87 let pages = CachedPages::new(xref.clone()).ok_or(LoadPdfError::Invalid)?;
88
89 let page_count = pages.get().len();
94 if page_count > MAX_PAGES {
95 return Err(LoadPdfError::TooLarge(object_count, page_count));
96 }
97
98 Ok(Self {
99 xref,
100 header_version: version,
101 pages,
102 data,
103 })
104 }
105
106 pub fn len(&self) -> usize {
108 self.xref.len()
109 }
110
111 pub fn objects(&self) -> impl IntoIterator<Item = Object<'_>> {
113 self.xref.objects()
114 }
115
116 pub fn version(&self) -> PdfVersion {
118 self.xref
119 .trailer_data()
120 .version
121 .unwrap_or(self.header_version)
122 }
123
124 pub fn data(&self) -> &PdfData {
126 &self.data
127 }
128
129 pub fn pages(&self) -> &Pages<'_> {
131 self.pages.get()
132 }
133
134 pub fn xref(&self) -> &XRef {
136 &self.xref
137 }
138
139 pub fn metadata(&self) -> &Metadata {
141 self.xref.metadata()
142 }
143}
144
145fn find_version(data: &[u8]) -> Option<PdfVersion> {
146 let data = &data[..data.len().min(2000)];
147 let mut r = Reader::new(data);
148
149 while r.forward_tag(b"%PDF-").is_none() {
150 r.read_byte()?;
151 }
152
153 PdfVersion::from_bytes(r.tail()?)
154}
155
156#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
158pub enum PdfVersion {
159 Pdf10,
161 Pdf11,
163 Pdf12,
165 Pdf13,
167 Pdf14,
169 Pdf15,
171 Pdf16,
173 Pdf17,
175 Pdf20,
177}
178
179impl PdfVersion {
180 pub(crate) fn from_bytes(bytes: &[u8]) -> Option<Self> {
181 match bytes.get(..3)? {
182 b"1.0" => Some(Self::Pdf10),
183 b"1.1" => Some(Self::Pdf11),
184 b"1.2" => Some(Self::Pdf12),
185 b"1.3" => Some(Self::Pdf13),
186 b"1.4" => Some(Self::Pdf14),
187 b"1.5" => Some(Self::Pdf15),
188 b"1.6" => Some(Self::Pdf16),
189 b"1.7" => Some(Self::Pdf17),
190 b"2.0" => Some(Self::Pdf20),
191 _ => None,
192 }
193 }
194}
195
196#[cfg(test)]
197mod tests {
198 use crate::pdf::{Pdf, PdfVersion};
199
200 #[test]
201 fn issue_49() {
202 let _ = Pdf::new(Vec::new());
203 }
204
205 #[test]
206 #[ignore = "requires hayro-tests corpus"]
207 fn pdf_version_header() {
208 let data = std::fs::read("../hayro-tests/downloads/pdfjs/alphatrans.pdf").unwrap();
209 let pdf = Pdf::new(data).unwrap();
210
211 assert_eq!(pdf.version(), PdfVersion::Pdf17);
212 }
213
214 #[test]
215 #[ignore = "requires hayro-tests corpus"]
216 fn pdf_version_catalog() {
217 let data = std::fs::read("../hayro-tests/downloads/pdfbox/2163.pdf").unwrap();
218 let pdf = Pdf::new(data).unwrap();
219
220 assert_eq!(pdf.version(), PdfVersion::Pdf14);
221 }
222}