1#[macro_export]
3macro_rules! wln {
4 ($dst:expr $(,)?) => {
5 if let Err(e) = writeln!($dst) {
6 if e.kind() == ::std::io::ErrorKind::BrokenPipe { ::std::process::exit(0); }
7 eprintln!("Write error: {}", e);
8 ::std::process::exit(1);
9 }
10 };
11 ($dst:expr, $($arg:tt)*) => {
12 if let Err(e) = writeln!($dst, $($arg)*) {
13 if e.kind() == ::std::io::ErrorKind::BrokenPipe { ::std::process::exit(0); }
14 eprintln!("Write error: {}", e);
15 ::std::process::exit(1);
16 }
17 };
18}
19
20#[macro_export]
22macro_rules! w {
23 ($dst:expr, $($arg:tt)*) => {
24 if let Err(e) = write!($dst, $($arg)*) {
25 if e.kind() == ::std::io::ErrorKind::BrokenPipe { ::std::process::exit(0); }
26 eprintln!("Write error: {}", e);
27 ::std::process::exit(1);
28 }
29 };
30}
31
32pub(crate) mod annotations;
33pub(crate) mod bookmarks;
34pub(crate) mod embedded;
35pub(crate) mod find_text;
36pub(crate) mod fonts;
37pub(crate) mod forms;
38pub(crate) mod helpers;
39pub(crate) mod images;
40pub(crate) mod inspect;
41pub(crate) mod layers;
42pub(crate) mod object;
43pub(crate) mod operators;
44pub(crate) mod page_info;
45pub(crate) mod page_labels;
46pub(crate) mod refs;
47pub(crate) mod resources;
48pub(crate) mod search;
49pub(crate) mod security;
50pub(crate) mod stream;
51pub(crate) mod structure;
52pub(crate) mod summary;
53pub(crate) mod text;
54pub(crate) mod tree;
55pub(crate) mod types;
56pub(crate) mod validate;
57
58use clap::Parser;
59use lopdf::{Document, Object};
60use serde_json::Value;
61use std::io::{self, Write};
62
63use helpers::json_pretty;
64use types::{Args, DocMode, DumpConfig, PageSpec, ResolvedMode, StandaloneMode};
65
66pub fn run() {
67 let args = Args::parse();
68
69 let resolved = args.resolve_mode().unwrap_or_else(|e| {
70 eprintln!("Error: {}", e);
71 std::process::exit(1);
72 });
73
74 if args.raw {
76 if !matches!(
77 resolved,
78 ResolvedMode::Standalone(StandaloneMode::Object { .. })
79 ) {
80 eprintln!("Error: --raw requires --object.");
81 std::process::exit(1);
82 }
83 if args.decode {
84 eprintln!("Error: --raw and --decode cannot be used together.");
85 std::process::exit(1);
86 }
87 }
88
89 let doc = match Document::load(&args.file) {
90 Ok(doc) => doc,
91 Err(e) => {
92 eprintln!("Error: Failed to load PDF file '{}'.", args.file.display());
93 eprintln!("Reason: {}", e);
94 std::process::exit(1);
95 }
96 };
97
98 let config = DumpConfig {
99 decode: args.decode,
100 truncate: args.truncate,
101 json: args.json,
102 hex: args.hex,
103 depth: args.depth,
104 deref: args.deref,
105 raw: args.raw,
106 };
107
108 let page_spec = args.page.as_deref().map(|s| {
109 PageSpec::parse(s).unwrap_or_else(|e| {
110 eprintln!("Error: {}", e);
111 std::process::exit(1);
112 })
113 });
114
115 let mut out = io::stdout().lock();
116
117 match resolved {
118 ResolvedMode::Default => {
119 dispatch_default(&mut out, &doc, &config, page_spec.as_ref());
120 }
121 ResolvedMode::Standalone(mode) => {
122 dispatch_standalone(&mut out, &doc, &config, mode);
123 }
124 ResolvedMode::Combined(modes) => {
125 dispatch_combined(&mut out, &doc, &config, page_spec.as_ref(), &args, &modes);
126 }
127 }
128}
129
130fn dispatch_default(
131 out: &mut impl Write,
132 doc: &Document,
133 config: &DumpConfig,
134 page_spec: Option<&PageSpec>,
135) {
136 if let Some(spec) = page_spec {
137 if config.json {
138 page_info::print_page_info_json(out, doc, spec);
139 } else {
140 page_info::print_page_info(out, doc, spec);
141 }
142 } else if config.json {
143 summary::print_overview_json(out, doc, config.decode);
144 } else {
145 summary::print_overview(out, doc, config.decode);
146 }
147}
148
149fn dispatch_standalone(
150 out: &mut impl Write,
151 doc: &Document,
152 config: &DumpConfig,
153 mode: StandaloneMode,
154) {
155 match mode {
156 StandaloneMode::ExtractStream {
157 obj_num,
158 ref output,
159 } => {
160 let object_id = (obj_num, 0);
161 match doc.get_object(object_id) {
162 Ok(Object::Stream(s)) => {
163 let (decoded_content, warning) = stream::decode_stream(s);
164 if let Some(warn) = &warning {
165 eprintln!("Warning: {}", warn);
166 }
167 if let Err(e) = std::fs::write(output, &*decoded_content) {
168 eprintln!("Error writing to output file: {}", e);
169 std::process::exit(1);
170 }
171 wln!(
172 out,
173 "Successfully extracted object {} to '{}'.",
174 obj_num,
175 output.display()
176 );
177 }
178 Ok(_) => {
179 eprintln!(
180 "Error: Object {} is not a stream and cannot be extracted to a file.",
181 obj_num
182 );
183 std::process::exit(1);
184 }
185 Err(_) => {
186 eprintln!("Error: Object {} not found in the document.", obj_num);
187 std::process::exit(1);
188 }
189 }
190 }
191 StandaloneMode::Object { ref nums } => {
192 if config.json {
193 object::print_objects_json(out, doc, nums, config);
194 } else {
195 object::print_objects(out, doc, nums, config);
196 }
197 }
198 StandaloneMode::Inspect { obj_num } => {
199 if config.json {
200 inspect::print_info_json(out, doc, obj_num, config);
201 } else {
202 inspect::print_info(out, doc, obj_num);
203 }
204 }
205 StandaloneMode::Search {
206 ref expr,
207 list_modifier,
208 } => {
209 let conditions = match search::parse_search_expr(expr) {
210 Ok(c) => c,
211 Err(e) => {
212 eprintln!("Error: Invalid search expression: {}", e);
213 std::process::exit(1);
214 }
215 };
216 if config.json {
217 search::search_objects_json(out, doc, expr, &conditions, config);
218 } else {
219 search::search_objects(out, doc, &conditions, config, list_modifier);
220 }
221 }
222 }
223}
224
225fn dispatch_combined(
226 out: &mut impl Write,
227 doc: &Document,
228 config: &DumpConfig,
229 page_spec: Option<&PageSpec>,
230 args: &Args,
231 modes: &[DocMode],
232) {
233 let multi = modes.len() > 1;
234
235 if config.json {
236 if multi {
237 let mut map = serde_json::Map::new();
239 for mode in modes {
240 let value = build_mode_json_value(mode, doc, config, page_spec, args);
241 map.insert(mode.json_key().to_string(), value);
242 }
243 let output = Value::Object(map);
244 wln!(out, "{}", json_pretty(&output));
245 } else {
246 let value = build_mode_json_value(&modes[0], doc, config, page_spec, args);
248 wln!(out, "{}", json_pretty(&value));
249 }
250 } else {
251 for (i, mode) in modes.iter().enumerate() {
252 if multi {
253 if i > 0 {
254 wln!(out);
255 }
256 wln!(out, "=== {} ===", mode.label());
257 }
258 dispatch_mode_text(out, mode, doc, config, page_spec, args);
259 }
260 }
261}
262
263fn build_mode_json_value(
264 mode: &DocMode,
265 doc: &Document,
266 config: &DumpConfig,
267 page_spec: Option<&PageSpec>,
268 args: &Args,
269) -> Value {
270 match mode {
271 DocMode::List => summary::list_json_value(doc),
272 DocMode::Validate => validate::validation_json_value(doc),
273 DocMode::Fonts => fonts::fonts_json_value(doc),
274 DocMode::Images => images::images_json_value(doc),
275 DocMode::Forms => forms::forms_json_value(doc),
276 DocMode::Bookmarks => bookmarks::bookmarks_json_value(doc),
277 DocMode::Annotations => annotations::annotations_json_value(doc, page_spec),
278 DocMode::Text => text::text_json_value(doc, page_spec),
279 DocMode::Operators => operators::operators_json_value(doc, page_spec),
280 DocMode::Tags => structure::structure_json_value(doc, config),
281 DocMode::Tree => tree::tree_json_value(doc, config),
282 DocMode::FindText => {
283 find_text::find_text_json_value(doc, args.find_text.as_deref().unwrap_or(""), page_spec)
284 }
285 DocMode::Detail(sub) => match sub {
286 types::DetailSub::Security => security::security_json_value(doc, &args.file),
287 types::DetailSub::Embedded => embedded::embedded_json_value(doc),
288 types::DetailSub::Labels => page_labels::labels_json_value(doc),
289 types::DetailSub::Layers => layers::layers_json_value(doc),
290 },
291 }
292}
293
294fn dispatch_mode_text(
295 out: &mut impl Write,
296 mode: &DocMode,
297 doc: &Document,
298 config: &DumpConfig,
299 page_spec: Option<&PageSpec>,
300 args: &Args,
301) {
302 match mode {
303 DocMode::List => summary::print_list(out, doc),
304 DocMode::Validate => validate::print_validation(out, doc),
305 DocMode::Fonts => fonts::print_fonts(out, doc),
306 DocMode::Images => images::print_images(out, doc),
307 DocMode::Forms => forms::print_forms(out, doc),
308 DocMode::Bookmarks => bookmarks::print_bookmarks(out, doc),
309 DocMode::Annotations => annotations::print_annotations(out, doc, page_spec),
310 DocMode::Text => text::print_text(out, doc, page_spec),
311 DocMode::Operators => operators::print_operators(out, doc, page_spec),
312 DocMode::Tags => structure::print_structure(out, doc, config),
313 DocMode::FindText => {
314 find_text::print_find_text(out, doc, args.find_text.as_deref().unwrap_or(""), page_spec)
315 }
316 DocMode::Tree => {
317 if args.dot {
318 tree::print_tree_dot(out, doc, config);
319 } else {
320 tree::print_tree(out, doc, config);
321 }
322 }
323 DocMode::Detail(sub) => match sub {
324 types::DetailSub::Security => security::print_security(out, doc, &args.file),
325 types::DetailSub::Embedded => embedded::print_embedded_files(out, doc),
326 types::DetailSub::Labels => page_labels::print_page_labels(out, doc),
327 types::DetailSub::Layers => layers::print_layers(out, doc),
328 },
329 }
330}
331
332#[cfg(test)]
333pub(crate) mod test_utils {
334 use crate::types::DumpConfig;
335 use flate2::Compression;
336 use flate2::write::ZlibEncoder;
337 use lopdf::{Document, Object, Stream};
338 use std::io::Write;
339
340 pub fn output_of(f: impl FnOnce(&mut Vec<u8>)) -> String {
341 let mut buf = Vec::new();
342 f(&mut buf);
343 String::from_utf8(buf).unwrap()
344 }
345
346 pub fn empty_doc() -> Document {
347 let mut doc = Document::new();
348 doc.version = "1.5".to_string();
349 doc
350 }
351
352 pub fn default_config() -> DumpConfig {
353 DumpConfig {
354 decode: false,
355 truncate: None,
356 json: false,
357 hex: false,
358 depth: None,
359 deref: false,
360 raw: false,
361 }
362 }
363
364 pub fn make_stream(filter: Option<Object>, content: Vec<u8>) -> Stream {
365 let mut dict = lopdf::Dictionary::new();
366 if let Some(f) = filter {
367 dict.set("Filter", f);
368 }
369 Stream::new(dict, content)
370 }
371
372 pub fn zlib_compress(data: &[u8]) -> Vec<u8> {
373 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
374 encoder.write_all(data).unwrap();
375 encoder.finish().unwrap()
376 }
377
378 pub fn json_config() -> DumpConfig {
379 DumpConfig {
380 decode: false,
381 truncate: None,
382 json: true,
383 hex: false,
384 depth: None,
385 deref: false,
386 raw: false,
387 }
388 }
389
390 pub fn build_two_page_doc() -> Document {
391 use lopdf::Dictionary;
392
393 let mut doc = Document::new();
394
395 let c1 = Stream::new(Dictionary::new(), b"BT /F1 12 Tf (Page1) Tj ET".to_vec());
396 let c1_id = doc.add_object(Object::Stream(c1));
397 let c2 = Stream::new(Dictionary::new(), b"BT /F1 12 Tf (Page2) Tj ET".to_vec());
398 let c2_id = doc.add_object(Object::Stream(c2));
399
400 let mut font = Dictionary::new();
401 font.set("Type", Object::Name(b"Font".to_vec()));
402 font.set("BaseFont", Object::Name(b"Helvetica".to_vec()));
403 let font_id = doc.add_object(Object::Dictionary(font));
404
405 let mut f1 = Dictionary::new();
406 f1.set("F1", Object::Reference(font_id));
407 let mut resources = Dictionary::new();
408 resources.set("Font", Object::Dictionary(f1));
409 let resources_id = doc.add_object(Object::Dictionary(resources));
410
411 let mut pages = Dictionary::new();
412 pages.set("Type", Object::Name(b"Pages".to_vec()));
413 pages.set("Count", Object::Integer(2));
414 pages.set("Kids", Object::Array(vec![]));
415 let pages_id = doc.add_object(Object::Dictionary(pages));
416
417 let mut p1 = Dictionary::new();
418 p1.set("Type", Object::Name(b"Page".to_vec()));
419 p1.set("Parent", Object::Reference(pages_id));
420 p1.set("Contents", Object::Reference(c1_id));
421 p1.set("Resources", Object::Reference(resources_id));
422 p1.set(
423 "MediaBox",
424 Object::Array(vec![
425 Object::Integer(0),
426 Object::Integer(0),
427 Object::Integer(612),
428 Object::Integer(792),
429 ]),
430 );
431 let p1_id = doc.add_object(Object::Dictionary(p1));
432
433 let mut p2 = Dictionary::new();
434 p2.set("Type", Object::Name(b"Page".to_vec()));
435 p2.set("Parent", Object::Reference(pages_id));
436 p2.set("Contents", Object::Reference(c2_id));
437 p2.set("Resources", Object::Reference(resources_id));
438 p2.set(
439 "MediaBox",
440 Object::Array(vec![
441 Object::Integer(0),
442 Object::Integer(0),
443 Object::Integer(612),
444 Object::Integer(792),
445 ]),
446 );
447 let p2_id = doc.add_object(Object::Dictionary(p2));
448
449 if let Ok(Object::Dictionary(d)) = doc.get_object_mut(pages_id) {
450 d.set(
451 "Kids",
452 Object::Array(vec![Object::Reference(p1_id), Object::Reference(p2_id)]),
453 );
454 }
455
456 let mut catalog = Dictionary::new();
457 catalog.set("Type", Object::Name(b"Catalog".to_vec()));
458 catalog.set("Pages", Object::Reference(pages_id));
459 let catalog_id = doc.add_object(Object::Dictionary(catalog));
460 doc.trailer.set("Root", Object::Reference(catalog_id));
461
462 doc
463 }
464
465 pub fn build_page_doc_with_content(content: &[u8]) -> Document {
466 use lopdf::Dictionary;
467
468 let mut doc = Document::new();
469 let stream = Stream::new(Dictionary::new(), content.to_vec());
470 doc.objects.insert((1, 0), Object::Stream(stream));
471 let mut page_dict = Dictionary::new();
472 page_dict.set("Type", Object::Name(b"Page".to_vec()));
473 page_dict.set("Contents", Object::Reference((1, 0)));
474 page_dict.set("Parent", Object::Reference((3, 0)));
475 doc.objects.insert((2, 0), Object::Dictionary(page_dict));
476 let mut pages_dict = Dictionary::new();
477 pages_dict.set("Type", Object::Name(b"Pages".to_vec()));
478 pages_dict.set("Count", Object::Integer(1));
479 pages_dict.set("Kids", Object::Array(vec![Object::Reference((2, 0))]));
480 doc.objects.insert((3, 0), Object::Dictionary(pages_dict));
481 let mut catalog = Dictionary::new();
482 catalog.set("Type", Object::Name(b"Catalog".to_vec()));
483 catalog.set("Pages", Object::Reference((3, 0)));
484 doc.objects.insert((4, 0), Object::Dictionary(catalog));
485 doc.trailer.set("Root", Object::Reference((4, 0)));
486 doc
487 }
488
489 pub fn make_page_with_annots(
490 doc: &mut Document,
491 page_id: lopdf::ObjectId,
492 parent_id: lopdf::ObjectId,
493 annot_ids: Vec<lopdf::ObjectId>,
494 ) {
495 use lopdf::Dictionary;
496
497 let mut page = Dictionary::new();
498 page.set("Type", Object::Name(b"Page".to_vec()));
499 page.set("Parent", Object::Reference(parent_id));
500 page.set(
501 "MediaBox",
502 Object::Array(vec![
503 Object::Integer(0),
504 Object::Integer(0),
505 Object::Integer(612),
506 Object::Integer(792),
507 ]),
508 );
509 let refs: Vec<Object> = annot_ids.iter().map(|id| Object::Reference(*id)).collect();
510 page.set("Annots", Object::Array(refs));
511 doc.objects.insert(page_id, Object::Dictionary(page));
512 }
513}