Skip to main content

pdf_objects/
types.rs

1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::error::{PdfError, PdfResult};
6
7pub type PdfDictionary = BTreeMap<String, PdfValue>;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
10pub struct ObjectRef {
11    pub object_number: u32,
12    pub generation: u16,
13}
14
15impl ObjectRef {
16    pub const fn new(object_number: u32, generation: u16) -> Self {
17        Self {
18            object_number,
19            generation,
20        }
21    }
22}
23
24#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
25pub struct PdfString(pub Vec<u8>);
26
27impl PdfString {
28    pub fn to_lossy_string(&self) -> String {
29        String::from_utf8_lossy(&self.0).into_owned()
30    }
31}
32
33#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
34pub enum PdfValue {
35    Null,
36    Bool(bool),
37    Integer(i64),
38    Number(f64),
39    Name(String),
40    String(PdfString),
41    Array(Vec<PdfValue>),
42    Dictionary(PdfDictionary),
43    Reference(ObjectRef),
44}
45
46impl PdfValue {
47    pub fn as_name(&self) -> Option<&str> {
48        match self {
49            PdfValue::Name(value) => Some(value.as_str()),
50            _ => None,
51        }
52    }
53
54    pub fn as_bool(&self) -> Option<bool> {
55        match self {
56            PdfValue::Bool(value) => Some(*value),
57            _ => None,
58        }
59    }
60
61    pub fn as_integer(&self) -> Option<i64> {
62        match self {
63            PdfValue::Integer(value) => Some(*value),
64            PdfValue::Number(value) if value.fract() == 0.0 => Some(*value as i64),
65            _ => None,
66        }
67    }
68
69    pub fn as_number(&self) -> Option<f64> {
70        match self {
71            PdfValue::Integer(value) => Some(*value as f64),
72            PdfValue::Number(value) => Some(*value),
73            _ => None,
74        }
75    }
76
77    pub fn as_array(&self) -> Option<&[PdfValue]> {
78        match self {
79            PdfValue::Array(values) => Some(values),
80            _ => None,
81        }
82    }
83
84    pub fn as_dictionary(&self) -> Option<&PdfDictionary> {
85        match self {
86            PdfValue::Dictionary(dictionary) => Some(dictionary),
87            _ => None,
88        }
89    }
90}
91
92#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
93pub struct PdfStream {
94    pub dict: PdfDictionary,
95    pub data: Vec<u8>,
96}
97
98#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
99pub enum PdfObject {
100    Value(PdfValue),
101    Stream(PdfStream),
102}
103
104#[derive(Debug, Clone, PartialEq)]
105pub enum XrefEntry {
106    Free,
107    Uncompressed {
108        offset: usize,
109        generation: u16,
110    },
111    Compressed {
112        stream_object_number: u32,
113        index: u32,
114    },
115}
116
117/// Cross-reference table form recorded at parse time. The serializer
118/// mirrors this on save: classic-input PDFs round-trip as classic-xref,
119/// xref-stream-shaped inputs round-trip as `Type /XRef` streams with
120/// eligible objects packed into `Type /ObjStm` containers.
121#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
122pub enum XrefForm {
123    Classic,
124    Stream,
125}
126
127#[derive(Debug, Clone)]
128pub struct PdfFile {
129    pub version: String,
130    pub objects: BTreeMap<ObjectRef, PdfObject>,
131    pub trailer: PdfDictionary,
132    pub max_object_number: u32,
133    pub xref_form: XrefForm,
134}
135
136impl PdfFile {
137    pub fn get_object(&self, object_ref: ObjectRef) -> PdfResult<&PdfObject> {
138        self.objects.get(&object_ref).ok_or_else(|| {
139            PdfError::MissingObject(format!(
140                "{} {}",
141                object_ref.object_number, object_ref.generation
142            ))
143        })
144    }
145
146    pub fn get_object_mut(&mut self, object_ref: ObjectRef) -> PdfResult<&mut PdfObject> {
147        self.objects.get_mut(&object_ref).ok_or_else(|| {
148            PdfError::MissingObject(format!(
149                "{} {}",
150                object_ref.object_number, object_ref.generation
151            ))
152        })
153    }
154
155    pub fn get_value(&self, object_ref: ObjectRef) -> PdfResult<&PdfValue> {
156        match self.get_object(object_ref)? {
157            PdfObject::Value(value) => Ok(value),
158            PdfObject::Stream(_) => Err(PdfError::Corrupt(format!(
159                "expected value object at {} {}",
160                object_ref.object_number, object_ref.generation
161            ))),
162        }
163    }
164
165    pub fn get_dictionary(&self, object_ref: ObjectRef) -> PdfResult<&PdfDictionary> {
166        match self.get_value(object_ref)? {
167            PdfValue::Dictionary(dictionary) => Ok(dictionary),
168            _ => Err(PdfError::Corrupt(format!(
169                "expected dictionary at {} {}",
170                object_ref.object_number, object_ref.generation
171            ))),
172        }
173    }
174
175    pub fn resolve<'a>(&'a self, value: &'a PdfValue) -> PdfResult<&'a PdfValue> {
176        match value {
177            PdfValue::Reference(object_ref) => self.get_value(*object_ref),
178            _ => Ok(value),
179        }
180    }
181
182    pub fn resolve_dict<'a>(&'a self, value: &'a PdfValue) -> PdfResult<&'a PdfDictionary> {
183        self.resolve(value)?
184            .as_dictionary()
185            .ok_or_else(|| PdfError::Corrupt("expected dictionary value".to_string()))
186    }
187
188    pub fn allocate_object_ref(&mut self) -> ObjectRef {
189        self.max_object_number += 1;
190        ObjectRef::new(self.max_object_number, 0)
191    }
192
193    pub fn insert_object(&mut self, object_ref: ObjectRef, object: PdfObject) {
194        self.max_object_number = self.max_object_number.max(object_ref.object_number);
195        self.objects.insert(object_ref, object);
196    }
197}