1use crate::PdfData;
4use crate::object::Object;
5use crate::page::Pages;
6use crate::page::cached::CachedPages;
7use crate::reader::Reader;
8use crate::xref::{XRef, XRefError, fallback, root_xref};
9use std::sync::Arc;
10
11pub struct Pdf {
13 xref: Arc<XRef>,
14 header_version: PdfVersion,
15 pages: CachedPages,
16 data: PdfData,
17}
18
19#[derive(Debug, Copy, Clone)]
21pub enum LoadPdfError {
22 Encryption,
24 Invalid,
26}
27
28#[allow(clippy::len_without_is_empty)]
29impl Pdf {
30 pub fn new(data: PdfData) -> Result<Self, LoadPdfError> {
34 let version = find_version(data.as_ref().as_ref()).unwrap_or(PdfVersion::Pdf10);
35 let xref = match root_xref(data.clone()) {
36 Ok(x) => x,
37 Err(e) => match e {
38 XRefError::Unknown => fallback(data.clone()).ok_or(LoadPdfError::Invalid)?,
39 XRefError::Encrypted => return Err(LoadPdfError::Encryption),
40 },
41 };
42 let xref = Arc::new(xref);
43
44 let pages = CachedPages::new(xref.clone()).ok_or(LoadPdfError::Invalid)?;
45
46 Ok(Self {
47 xref,
48 header_version: version,
49 pages,
50 data,
51 })
52 }
53
54 pub fn len(&self) -> usize {
56 self.xref.len()
57 }
58
59 pub fn objects(&self) -> impl IntoIterator<Item = Object<'_>> {
61 self.xref.objects()
62 }
63
64 pub fn version(&self) -> PdfVersion {
66 self.xref
67 .trailer_data()
68 .version
69 .unwrap_or(self.header_version)
70 }
71
72 pub fn data(&self) -> &PdfData {
74 &self.data
75 }
76
77 pub fn pages(&self) -> &Pages<'_> {
79 self.pages.get()
80 }
81
82 pub fn xref(&self) -> &XRef {
84 &self.xref
85 }
86}
87
88fn find_version(data: &[u8]) -> Option<PdfVersion> {
89 let data = &data[..data.len().min(2000)];
90 let mut r = Reader::new(data);
91
92 while r.forward_tag(b"%PDF-").is_none() {
93 r.read_byte()?;
94 }
95
96 PdfVersion::from_bytes(r.tail()?)
97}
98
99#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
101pub enum PdfVersion {
102 Pdf10,
104 Pdf11,
106 Pdf12,
108 Pdf13,
110 Pdf14,
112 Pdf15,
114 Pdf16,
116 Pdf17,
118 Pdf20,
120}
121
122impl PdfVersion {
123 pub(crate) fn from_bytes(bytes: &[u8]) -> Option<PdfVersion> {
124 match bytes.get(..3)? {
125 b"1.0" => Some(PdfVersion::Pdf10),
126 b"1.1" => Some(PdfVersion::Pdf11),
127 b"1.2" => Some(PdfVersion::Pdf12),
128 b"1.3" => Some(PdfVersion::Pdf13),
129 b"1.4" => Some(PdfVersion::Pdf14),
130 b"1.5" => Some(PdfVersion::Pdf15),
131 b"1.6" => Some(PdfVersion::Pdf16),
132 b"1.7" => Some(PdfVersion::Pdf17),
133 b"2.0" => Some(PdfVersion::Pdf20),
134 _ => None,
135 }
136 }
137}
138
139#[cfg(test)]
140mod tests {
141 use crate::pdf::{Pdf, PdfVersion};
142 use std::sync::Arc;
143
144 #[test]
145 fn issue_49() {
146 let data = Arc::new([]);
147 let _ = Pdf::new(data);
148 }
149
150 #[test]
151 fn pdf_version_header() {
152 let data = std::fs::read("../hayro-tests/downloads/pdfjs/alphatrans.pdf").unwrap();
153 let pdf = Pdf::new(Arc::new(data)).unwrap();
154
155 assert_eq!(pdf.version(), PdfVersion::Pdf17);
156 }
157
158 #[test]
159 fn pdf_version_catalog() {
160 let data = std::fs::read("../hayro-tests/downloads/pdfbox/2163.pdf").unwrap();
161 let pdf = Pdf::new(Arc::new(data)).unwrap();
162
163 assert_eq!(pdf.version(), PdfVersion::Pdf14);
164 }
165}