Skip to main content

dump_render_modes/
dump_render_modes.rs

1//! Diagnose a duplicated / hidden PDF text layer.
2//!
3//! Finds the first page whose text objects contain `needle` and lists that page's
4//! text objects, each tagged visible or INVISIBLE (text render mode 3), with their
5//! bounding box and text. A hidden duplicate layer — e.g. the plain-text copy
6//! web exporters stash behind a syntax-highlighted code block — shows up as
7//! INVISIBLE objects repeating the visible text, usually at a different position.
8//!
9//! Usage:
10//!   cargo run -p fleischwolf-pdf --example dump_render_modes -- file.pdf "LangVersion"
11
12use fleischwolf_pdf::pdfium_backend::{debug_text_objects, page_count};
13
14fn main() {
15    let path = std::env::args()
16        .nth(1)
17        .expect("usage: dump_render_modes <pdf> <needle>");
18    let needle = std::env::args().nth(2).expect("needle substring required");
19    let bytes = std::fs::read(&path).expect("read pdf");
20    let pages = page_count(&bytes, None).expect("page count");
21
22    for p in 0..pages as i32 {
23        let objs = debug_text_objects(&bytes, p);
24        if !objs.iter().any(|o| o.text.contains(&needle)) {
25            continue;
26        }
27        let visible = objs.iter().filter(|o| !o.invisible).count();
28        let invisible = objs.iter().filter(|o| o.invisible).count();
29        println!(
30            "page {p}: {} text object(s) — {visible} visible, {invisible} INVISIBLE\n",
31            objs.len()
32        );
33        for o in &objs {
34            let tag = if o.invisible {
35                "INVISIBLE"
36            } else {
37                "visible  "
38            };
39            let text: String = o.text.chars().take(70).collect();
40            println!(
41                "  [{tag}] l={:8.2} b={:8.2} r={:8.2} t={:8.2}  {text:?}",
42                o.l, o.b, o.r, o.t
43            );
44        }
45        return;
46    }
47    println!("needle {needle:?} not found in any text object across {pages} page(s)");
48}