1#[macro_export]
3macro_rules! wln {
4 ($dst:expr $(,)?) => {
5 if let Err(e) = writeln!($dst) {
6 if e.kind() == ::std::io::ErrorKind::BrokenPipe { ::std::process::exit(0); }
7 eprintln!("Write error: {}", e);
8 ::std::process::exit(1);
9 }
10 };
11 ($dst:expr, $($arg:tt)*) => {
12 if let Err(e) = writeln!($dst, $($arg)*) {
13 if e.kind() == ::std::io::ErrorKind::BrokenPipe { ::std::process::exit(0); }
14 eprintln!("Write error: {}", e);
15 ::std::process::exit(1);
16 }
17 };
18}
19
20#[macro_export]
22macro_rules! w {
23 ($dst:expr, $($arg:tt)*) => {
24 if let Err(e) = write!($dst, $($arg)*) {
25 if e.kind() == ::std::io::ErrorKind::BrokenPipe { ::std::process::exit(0); }
26 eprintln!("Write error: {}", e);
27 ::std::process::exit(1);
28 }
29 };
30}
31
32pub(crate) mod types;
33pub(crate) mod stream;
34pub(crate) mod helpers;
35pub(crate) mod object;
36pub(crate) mod refs;
37pub(crate) mod summary;
38pub(crate) mod search;
39pub(crate) mod text;
40pub(crate) mod operators;
41pub(crate) mod resources;
42pub(crate) mod forms;
43pub(crate) mod fonts;
44pub(crate) mod images;
45pub(crate) mod validate;
46pub(crate) mod bookmarks;
47pub(crate) mod annotations;
48pub(crate) mod security;
49pub(crate) mod embedded;
50pub(crate) mod page_labels;
51pub(crate) mod tree;
52pub(crate) mod layers;
53pub(crate) mod structure;
54pub(crate) mod inspect;
55pub(crate) mod page_info;
56pub(crate) mod find_text;
57
58use clap::Parser;
59use lopdf::{Document, Object};
60use serde_json::Value;
61use std::io::{self, Write};
62
63use helpers::json_pretty;
64use types::{Args, DocMode, DumpConfig, PageSpec, ResolvedMode, StandaloneMode};
65
66pub fn run() {
67 let args = Args::parse();
68
69 let resolved = args.resolve_mode().unwrap_or_else(|e| {
70 eprintln!("Error: {}", e);
71 std::process::exit(1);
72 });
73
74 if args.raw {
76 if !matches!(resolved, ResolvedMode::Standalone(StandaloneMode::Object { .. })) {
77 eprintln!("Error: --raw requires --object.");
78 std::process::exit(1);
79 }
80 if args.decode {
81 eprintln!("Error: --raw and --decode cannot be used together.");
82 std::process::exit(1);
83 }
84 }
85
86 let doc = match Document::load(&args.file) {
87 Ok(doc) => doc,
88 Err(e) => {
89 eprintln!("Error: Failed to load PDF file '{}'.", args.file.display());
90 eprintln!("Reason: {}", e);
91 std::process::exit(1);
92 }
93 };
94
95 let config = DumpConfig {
96 decode: args.decode,
97 truncate: args.truncate,
98 json: args.json,
99 hex: args.hex,
100 depth: args.depth,
101 deref: args.deref,
102 raw: args.raw,
103 };
104
105 let page_spec = args.page.as_deref().map(|s| {
106 PageSpec::parse(s).unwrap_or_else(|e| {
107 eprintln!("Error: {}", e);
108 std::process::exit(1);
109 })
110 });
111
112 let mut out = io::stdout().lock();
113
114 match resolved {
115 ResolvedMode::Default => {
116 dispatch_default(&mut out, &doc, &config, page_spec.as_ref());
117 }
118 ResolvedMode::Standalone(mode) => {
119 dispatch_standalone(&mut out, &doc, &config, mode);
120 }
121 ResolvedMode::Combined(modes) => {
122 dispatch_combined(&mut out, &doc, &config, page_spec.as_ref(), &args, &modes);
123 }
124 }
125}
126
127fn dispatch_default(
128 out: &mut impl Write,
129 doc: &Document,
130 config: &DumpConfig,
131 page_spec: Option<&PageSpec>,
132) {
133 if let Some(spec) = page_spec {
134 if config.json {
135 page_info::print_page_info_json(out, doc, spec);
136 } else {
137 page_info::print_page_info(out, doc, spec);
138 }
139 } else if config.json {
140 summary::print_overview_json(out, doc, config.decode);
141 } else {
142 summary::print_overview(out, doc, config.decode);
143 }
144}
145
146fn dispatch_standalone(
147 out: &mut impl Write,
148 doc: &Document,
149 config: &DumpConfig,
150 mode: StandaloneMode,
151) {
152 match mode {
153 StandaloneMode::ExtractStream { obj_num, ref output } => {
154 let object_id = (obj_num, 0);
155 match doc.get_object(object_id) {
156 Ok(Object::Stream(s)) => {
157 let (decoded_content, warning) = stream::decode_stream(s);
158 if let Some(warn) = &warning {
159 eprintln!("Warning: {}", warn);
160 }
161 if let Err(e) = std::fs::write(output, &*decoded_content) {
162 eprintln!("Error writing to output file: {}", e);
163 std::process::exit(1);
164 }
165 wln!(out, "Successfully extracted object {} to '{}'.", obj_num, output.display());
166 }
167 Ok(_) => {
168 eprintln!("Error: Object {} is not a stream and cannot be extracted to a file.", obj_num);
169 std::process::exit(1);
170 }
171 Err(_) => {
172 eprintln!("Error: Object {} not found in the document.", obj_num);
173 std::process::exit(1);
174 }
175 }
176 }
177 StandaloneMode::Object { ref nums } => {
178 if config.json {
179 object::print_objects_json(out, doc, nums, config);
180 } else {
181 object::print_objects(out, doc, nums, config);
182 }
183 }
184 StandaloneMode::Inspect { obj_num } => {
185 if config.json {
186 inspect::print_info_json(out, doc, obj_num, config);
187 } else {
188 inspect::print_info(out, doc, obj_num);
189 }
190 }
191 StandaloneMode::Search { ref expr, list_modifier } => {
192 let conditions = match search::parse_search_expr(expr) {
193 Ok(c) => c,
194 Err(e) => {
195 eprintln!("Error: Invalid search expression: {}", e);
196 std::process::exit(1);
197 }
198 };
199 if config.json {
200 search::search_objects_json(out, doc, expr, &conditions, config);
201 } else {
202 search::search_objects(out, doc, &conditions, config, list_modifier);
203 }
204 }
205 }
206}
207
208fn dispatch_combined(
209 out: &mut impl Write,
210 doc: &Document,
211 config: &DumpConfig,
212 page_spec: Option<&PageSpec>,
213 args: &Args,
214 modes: &[DocMode],
215) {
216 let multi = modes.len() > 1;
217
218 if config.json {
219 if multi {
220 let mut map = serde_json::Map::new();
222 for mode in modes {
223 let value = build_mode_json_value(mode, doc, config, page_spec, args);
224 map.insert(mode.json_key().to_string(), value);
225 }
226 let output = Value::Object(map);
227 wln!(out, "{}", json_pretty(&output));
228 } else {
229 let value = build_mode_json_value(&modes[0], doc, config, page_spec, args);
231 wln!(out, "{}", json_pretty(&value));
232 }
233 } else {
234 for (i, mode) in modes.iter().enumerate() {
235 if multi {
236 if i > 0 {
237 wln!(out);
238 }
239 wln!(out, "=== {} ===", mode.label());
240 }
241 dispatch_mode_text(out, mode, doc, config, page_spec, args);
242 }
243 }
244}
245
246fn build_mode_json_value(
247 mode: &DocMode,
248 doc: &Document,
249 config: &DumpConfig,
250 page_spec: Option<&PageSpec>,
251 args: &Args,
252) -> Value {
253 match mode {
254 DocMode::List => summary::list_json_value(doc),
255 DocMode::Validate => validate::validation_json_value(doc),
256 DocMode::Fonts => fonts::fonts_json_value(doc),
257 DocMode::Images => images::images_json_value(doc),
258 DocMode::Forms => forms::forms_json_value(doc),
259 DocMode::Bookmarks => bookmarks::bookmarks_json_value(doc),
260 DocMode::Annotations => annotations::annotations_json_value(doc, page_spec),
261 DocMode::Text => text::text_json_value(doc, page_spec),
262 DocMode::Operators => operators::operators_json_value(doc, page_spec),
263 DocMode::Tags => structure::structure_json_value(doc, config),
264 DocMode::Tree => tree::tree_json_value(doc, config),
265 DocMode::FindText => find_text::find_text_json_value(doc, args.find_text.as_deref().unwrap_or(""), page_spec),
266 DocMode::Detail(sub) => match sub {
267 types::DetailSub::Security => security::security_json_value(doc, &args.file),
268 types::DetailSub::Embedded => embedded::embedded_json_value(doc),
269 types::DetailSub::Labels => page_labels::labels_json_value(doc),
270 types::DetailSub::Layers => layers::layers_json_value(doc),
271 },
272 }
273}
274
275fn dispatch_mode_text(
276 out: &mut impl Write,
277 mode: &DocMode,
278 doc: &Document,
279 config: &DumpConfig,
280 page_spec: Option<&PageSpec>,
281 args: &Args,
282) {
283 match mode {
284 DocMode::List => summary::print_list(out, doc),
285 DocMode::Validate => validate::print_validation(out, doc),
286 DocMode::Fonts => fonts::print_fonts(out, doc),
287 DocMode::Images => images::print_images(out, doc),
288 DocMode::Forms => forms::print_forms(out, doc),
289 DocMode::Bookmarks => bookmarks::print_bookmarks(out, doc),
290 DocMode::Annotations => annotations::print_annotations(out, doc, page_spec),
291 DocMode::Text => text::print_text(out, doc, page_spec),
292 DocMode::Operators => operators::print_operators(out, doc, page_spec),
293 DocMode::Tags => structure::print_structure(out, doc, config),
294 DocMode::FindText => find_text::print_find_text(out, doc, args.find_text.as_deref().unwrap_or(""), page_spec),
295 DocMode::Tree => {
296 if args.dot {
297 tree::print_tree_dot(out, doc, config);
298 } else {
299 tree::print_tree(out, doc, config);
300 }
301 }
302 DocMode::Detail(sub) => match sub {
303 types::DetailSub::Security => security::print_security(out, doc, &args.file),
304 types::DetailSub::Embedded => embedded::print_embedded_files(out, doc),
305 types::DetailSub::Labels => page_labels::print_page_labels(out, doc),
306 types::DetailSub::Layers => layers::print_layers(out, doc),
307 },
308 }
309}
310
311#[cfg(test)]
312pub(crate) mod test_utils {
313 use lopdf::{Document, Object, Stream};
314 use flate2::write::ZlibEncoder;
315 use flate2::Compression;
316 use std::io::Write;
317 use crate::types::DumpConfig;
318
319 pub fn output_of(f: impl FnOnce(&mut Vec<u8>)) -> String {
320 let mut buf = Vec::new();
321 f(&mut buf);
322 String::from_utf8(buf).unwrap()
323 }
324
325 pub fn empty_doc() -> Document {
326 let mut doc = Document::new();
327 doc.version = "1.5".to_string();
328 doc
329 }
330
331 pub fn default_config() -> DumpConfig {
332 DumpConfig {
333 decode: false,
334 truncate: None,
335 json: false,
336 hex: false,
337 depth: None,
338 deref: false,
339 raw: false,
340 }
341 }
342
343 pub fn make_stream(filter: Option<Object>, content: Vec<u8>) -> Stream {
344 let mut dict = lopdf::Dictionary::new();
345 if let Some(f) = filter {
346 dict.set("Filter", f);
347 }
348 Stream::new(dict, content)
349 }
350
351 pub fn zlib_compress(data: &[u8]) -> Vec<u8> {
352 let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
353 encoder.write_all(data).unwrap();
354 encoder.finish().unwrap()
355 }
356
357 pub fn json_config() -> DumpConfig {
358 DumpConfig { decode: false, truncate: None, json: true, hex: false, depth: None, deref: false, raw: false }
359 }
360
361 pub fn build_two_page_doc() -> Document {
362 use lopdf::Dictionary;
363
364 let mut doc = Document::new();
365
366 let c1 = Stream::new(Dictionary::new(), b"BT /F1 12 Tf (Page1) Tj ET".to_vec());
367 let c1_id = doc.add_object(Object::Stream(c1));
368 let c2 = Stream::new(Dictionary::new(), b"BT /F1 12 Tf (Page2) Tj ET".to_vec());
369 let c2_id = doc.add_object(Object::Stream(c2));
370
371 let mut font = Dictionary::new();
372 font.set("Type", Object::Name(b"Font".to_vec()));
373 font.set("BaseFont", Object::Name(b"Helvetica".to_vec()));
374 let font_id = doc.add_object(Object::Dictionary(font));
375
376 let mut f1 = Dictionary::new();
377 f1.set("F1", Object::Reference(font_id));
378 let mut resources = Dictionary::new();
379 resources.set("Font", Object::Dictionary(f1));
380 let resources_id = doc.add_object(Object::Dictionary(resources));
381
382 let mut pages = Dictionary::new();
383 pages.set("Type", Object::Name(b"Pages".to_vec()));
384 pages.set("Count", Object::Integer(2));
385 pages.set("Kids", Object::Array(vec![]));
386 let pages_id = doc.add_object(Object::Dictionary(pages));
387
388 let mut p1 = Dictionary::new();
389 p1.set("Type", Object::Name(b"Page".to_vec()));
390 p1.set("Parent", Object::Reference(pages_id));
391 p1.set("Contents", Object::Reference(c1_id));
392 p1.set("Resources", Object::Reference(resources_id));
393 p1.set("MediaBox", Object::Array(vec![
394 Object::Integer(0), Object::Integer(0),
395 Object::Integer(612), Object::Integer(792),
396 ]));
397 let p1_id = doc.add_object(Object::Dictionary(p1));
398
399 let mut p2 = Dictionary::new();
400 p2.set("Type", Object::Name(b"Page".to_vec()));
401 p2.set("Parent", Object::Reference(pages_id));
402 p2.set("Contents", Object::Reference(c2_id));
403 p2.set("Resources", Object::Reference(resources_id));
404 p2.set("MediaBox", Object::Array(vec![
405 Object::Integer(0), Object::Integer(0),
406 Object::Integer(612), Object::Integer(792),
407 ]));
408 let p2_id = doc.add_object(Object::Dictionary(p2));
409
410 if let Ok(Object::Dictionary(d)) = doc.get_object_mut(pages_id) {
411 d.set("Kids", Object::Array(vec![
412 Object::Reference(p1_id),
413 Object::Reference(p2_id),
414 ]));
415 }
416
417 let mut catalog = Dictionary::new();
418 catalog.set("Type", Object::Name(b"Catalog".to_vec()));
419 catalog.set("Pages", Object::Reference(pages_id));
420 let catalog_id = doc.add_object(Object::Dictionary(catalog));
421 doc.trailer.set("Root", Object::Reference(catalog_id));
422
423 doc
424 }
425
426 pub fn build_page_doc_with_content(content: &[u8]) -> Document {
427 use lopdf::Dictionary;
428
429 let mut doc = Document::new();
430 let stream = Stream::new(Dictionary::new(), content.to_vec());
431 doc.objects.insert((1, 0), Object::Stream(stream));
432 let mut page_dict = Dictionary::new();
433 page_dict.set("Type", Object::Name(b"Page".to_vec()));
434 page_dict.set("Contents", Object::Reference((1, 0)));
435 page_dict.set("Parent", Object::Reference((3, 0)));
436 doc.objects.insert((2, 0), Object::Dictionary(page_dict));
437 let mut pages_dict = Dictionary::new();
438 pages_dict.set("Type", Object::Name(b"Pages".to_vec()));
439 pages_dict.set("Count", Object::Integer(1));
440 pages_dict.set("Kids", Object::Array(vec![Object::Reference((2, 0))]));
441 doc.objects.insert((3, 0), Object::Dictionary(pages_dict));
442 let mut catalog = Dictionary::new();
443 catalog.set("Type", Object::Name(b"Catalog".to_vec()));
444 catalog.set("Pages", Object::Reference((3, 0)));
445 doc.objects.insert((4, 0), Object::Dictionary(catalog));
446 doc.trailer.set("Root", Object::Reference((4, 0)));
447 doc
448 }
449
450 pub fn make_page_with_annots(doc: &mut Document, page_id: lopdf::ObjectId, parent_id: lopdf::ObjectId, annot_ids: Vec<lopdf::ObjectId>) {
451 use lopdf::Dictionary;
452
453 let mut page = Dictionary::new();
454 page.set("Type", Object::Name(b"Page".to_vec()));
455 page.set("Parent", Object::Reference(parent_id));
456 page.set("MediaBox", Object::Array(vec![
457 Object::Integer(0), Object::Integer(0), Object::Integer(612), Object::Integer(792),
458 ]));
459 let refs: Vec<Object> = annot_ids.iter().map(|id| Object::Reference(*id)).collect();
460 page.set("Annots", Object::Array(refs));
461 doc.objects.insert(page_id, Object::Dictionary(page));
462 }
463}