pdfluent_lopdf/
processor.rs1use crate::Result;
2use crate::{Document, Object, ObjectId};
3use std::collections::BTreeMap;
4use std::fs::File;
5use std::io::Write;
6
7impl Document {
8 pub fn change_producer(&mut self, producer: &str) {
10 if let Ok(info) = self.trailer.get_mut(b"Info") {
11 if let Some(dict) = match info {
12 Object::Dictionary(dict) => Some(dict),
13 Object::Reference(id) => {
14 self.objects.get_mut(id).and_then(|o| o.as_dict_mut().ok())
15 }
16 _ => None,
17 } {
18 dict.set("Producer", Object::string_literal(producer));
19 }
20 }
21 }
22
23 pub fn compress(&mut self) {
25 for object in self.objects.values_mut() {
26 if let Object::Stream(stream) = object {
27 if stream.allows_compression {
28 let _ = stream.compress();
30 }
31 }
32 }
33 }
34
35 pub fn decompress(&mut self) {
37 for object in self.objects.values_mut() {
38 if let Object::Stream(stream) = object {
39 let _ = stream.decompress();
40 }
41 }
42 }
43
44 pub fn delete_pages(&mut self, page_numbers: &[u32]) {
46 use std::collections::HashSet;
52
53 let pages = self.get_pages();
54 let ids_to_delete: HashSet<ObjectId> = page_numbers
55 .iter()
56 .filter_map(|pn| pages.get(pn).copied())
57 .collect();
58
59 if ids_to_delete.is_empty() {
60 return;
61 }
62
63 let mut count_delta: BTreeMap<ObjectId, i64> = BTreeMap::new();
65
66 for &page_id in &ids_to_delete {
67 if let Some(page_obj) = self.objects.get(&page_id) {
69 let parent_ref = page_obj
70 .as_dict()
71 .ok()
72 .and_then(|d| d.get(b"Parent").ok())
73 .and_then(|o| o.as_reference().ok());
74 let mut cur = parent_ref;
75 while let Some(tree_id) = cur {
76 *count_delta.entry(tree_id).or_insert(0) += 1;
77 cur = self
78 .objects
79 .get(&tree_id)
80 .and_then(|o| o.as_dict().ok())
81 .and_then(|d| d.get(b"Parent").ok())
82 .and_then(|o| o.as_reference().ok());
83 }
84 }
85 }
86
87 for obj in self.objects.values_mut() {
89 match obj {
90 Object::Array(arr) => {
91 arr.retain(|item| match item {
92 Object::Reference(r) => !ids_to_delete.contains(r),
93 _ => true,
94 });
95 }
96 Object::Dictionary(dict) => {
97 if let Ok(Object::Array(arr)) = dict.get_mut(b"Kids") {
98 arr.retain(|item| match item {
99 Object::Reference(r) => !ids_to_delete.contains(r),
100 _ => true,
101 });
102 }
103 }
104 _ => {}
105 }
106 }
107
108 for (tree_id, delta) in count_delta {
110 if let Some(obj) = self.objects.get_mut(&tree_id) {
111 if let Ok(dict) = obj.as_dict_mut() {
112 if let Ok(count) = dict.get(b"Count").and_then(Object::as_i64) {
113 dict.set("Count", (count - delta).max(0));
114 }
115 }
116 }
117 }
118
119 for page_id in ids_to_delete {
121 self.objects.remove(&page_id);
122 }
123 }
124
125 pub fn prune_objects(&mut self) -> Vec<ObjectId> {
127 let mut ids = vec![];
128 let refs = self.traverse_objects(|_| {});
129 for id in self.objects.keys() {
130 if !refs.contains(id) {
131 ids.push(*id);
132 }
133 }
134
135 for id in &ids {
136 self.objects.remove(id);
137 }
138
139 ids
140 }
141
142 pub fn delete_object(&mut self, id: ObjectId) -> Option<Object> {
144 let action = |object: &mut Object| match object {
145 Object::Array(array) => {
146 if let Some(index) = array.iter().position(|item: &Object| match *item {
147 Object::Reference(ref_id) => ref_id == id,
148 _ => false,
149 }) {
150 array.remove(index);
151 }
152 }
153 Object::Dictionary(dict) => {
154 let keys: Vec<Vec<u8>> = dict
155 .iter()
156 .filter(|&(_, item): &(&Vec<u8>, &Object)| match *item {
157 Object::Reference(ref_id) => ref_id == id,
158 _ => false,
159 })
160 .map(|(k, _)| k.clone())
161 .collect();
162 for key in keys {
163 dict.remove(&key);
164 }
165 }
166 _ => {}
167 };
168 self.traverse_objects(action);
169 self.objects.remove(&id)
170 }
171
172 pub fn delete_zero_length_streams(&mut self) -> Vec<ObjectId> {
174 let mut ids = vec![];
175 for id in self.objects.keys() {
176 if self
177 .objects
178 .get(id)
179 .and_then(|o| Object::as_stream(o).ok())
180 .map(|stream| stream.content.is_empty())
181 .unwrap_or(false)
182 {
183 ids.push(*id);
184 }
185 }
186
187 for id in &ids {
188 self.delete_object(*id);
189 }
190
191 ids
192 }
193
194 pub fn renumber_objects(&mut self) {
196 self.renumber_objects_with(1)
197 }
198
199 fn update_bookmark_pages(&mut self, bookmarks: &[u32], old: &ObjectId, new: &ObjectId) {
200 for id in bookmarks {
201 let (children, page) = match self.bookmark_table.get(id) {
202 Some(n) => (n.children.clone(), n.page),
203 None => return,
204 };
205
206 if page == *old {
207 let bookmark = self.bookmark_table.get_mut(id).unwrap();
208 bookmark.page = *new;
209 }
210
211 if !children.is_empty() {
212 self.update_bookmark_pages(&children[..], old, new);
213 }
214 }
215 }
216
217 pub fn renumber_bookmarks(&mut self, old: &ObjectId, new: &ObjectId) {
218 if !self.bookmarks.is_empty() {
219 self.update_bookmark_pages(&self.bookmarks.clone(), old, new);
220 }
221 }
222
223 pub fn renumber_objects_with(&mut self, starting_id: u32) {
226 let mut replace = BTreeMap::new();
227 let mut new_id = starting_id;
228 let mut i = 0;
229
230 let mut page_order: Vec<(i32, (u32, u16))> = self
234 .page_iter()
235 .map(|id| {
236 i += 1;
237 (i, id)
238 })
239 .collect();
240
241 page_order.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
242
243 i = 0;
244
245 let needs_ordering = page_order.iter().any(|a| {
246 i += 1;
247 a.0 != i
248 });
249
250 if needs_ordering {
251 let mut pages = page_order.clone();
252 pages.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
253 let mut objects = BTreeMap::new();
254
255 for (old, new) in pages.iter().zip(page_order) {
256 if let Some(object) = self.objects.remove(&old.1) {
257 objects.insert((new.1.0, old.1.1), object);
258 replace.insert(old.1, (new.1.0, old.1.1));
259 }
260
261 if old.1 != new.1 {
262 self.renumber_bookmarks(&old.1, &(new.1.0, old.1.1));
263 }
264 }
265
266 for (new, object) in objects {
267 self.objects.insert(new, object);
268 }
269
270 let action = |object: &mut Object| {
271 if let Object::Reference(id) = object {
272 if replace.contains_key(id) {
273 *id = replace[id];
274 }
275 }
276 };
277
278 self.traverse_objects(action);
279 replace.clear();
280 }
281
282 let mut ids = self.objects.keys().cloned().collect::<Vec<ObjectId>>();
283 ids.sort_unstable();
284
285 for id in ids {
286 if id.0 != new_id {
287 replace.insert(id, (new_id, id.1));
288 }
289
290 new_id += 1;
291 }
292
293 let mut objects = BTreeMap::new();
294
295 for (old, new) in &replace {
297 if let Some(object) = self.objects.remove(old) {
298 objects.insert(*new, object);
299 }
300
301 if old != new {
302 self.renumber_bookmarks(old, new);
303 }
304 }
305
306 for (new, object) in objects {
308 self.objects.insert(new, object);
309 }
310
311 let action = |object: &mut Object| {
312 if let Object::Reference(id) = object {
313 if replace.contains_key(id) {
314 *id = replace[id];
315 }
316 }
317 };
318
319 self.traverse_objects(action);
320
321 self.max_id = new_id - 1;
322 }
323
324 pub fn change_content_stream(&mut self, stream_id: ObjectId, content: Vec<u8>) {
325 if let Some(Object::Stream(stream)) = self.objects.get_mut(&stream_id) {
326 stream.set_plain_content(content);
327 let _ = stream.compress();
329 }
330 }
331
332 pub fn change_page_content(&mut self, page_id: ObjectId, content: Vec<u8>) -> Result<()> {
333 let contents = self
334 .get_dictionary(page_id)
335 .and_then(|page| page.get(b"Contents"))?;
336 match contents {
337 Object::Reference(id) => self.change_content_stream(*id, content),
338 Object::Array(arr) => {
339 if arr.len() == 1 {
340 if let Ok(id) = arr[0].as_reference() {
341 self.change_content_stream(id, content)
342 }
343 } else {
344 let new_stream = self.add_object(super::Stream::new(dictionary! {}, content));
345 if let Ok(Object::Dictionary(dict)) = self.get_object_mut(page_id) {
346 dict.set("Contents", new_stream);
347 }
348 }
349 }
350 _ => {}
351 }
352 Ok(())
353 }
354
355 pub fn extract_stream(&self, stream_id: ObjectId, decompress: bool) -> Result<()> {
356 let mut file = File::create(format!("{stream_id:?}.bin"))?;
357 if let Ok(Object::Stream(stream)) = self.get_object(stream_id) {
358 if decompress {
359 if let Ok(data) = stream.decompressed_content() {
360 file.write_all(&data)?;
361 } else {
362 file.write_all(&stream.content)?;
363 }
364 } else {
365 file.write_all(&stream.content)?;
366 }
367 }
368 Ok(())
369 }
370}