Skip to main content

debug_text_objects

Function debug_text_objects 

Source
pub fn debug_text_objects(bytes: &[u8], index: i32) -> Vec<DebugTextObject>
Expand description

Diagnostic: every text object on page index, each tagged visible/invisible (via the object-level FPDFTextObj_GetTextRenderMode, which — unlike the per-character render-mode API — is available on the default pdfium binding). A hidden duplicate text layer shows up as invisible objects repeating the visible text. Used by the dump_render_modes example.

Examples found in repository?
examples/dump_render_modes.rs (line 23)
14fn main() {
15    let path = std::env::args()
16        .nth(1)
17        .expect("usage: dump_render_modes <pdf> <needle>");
18    let needle = std::env::args().nth(2).expect("needle substring required");
19    let bytes = std::fs::read(&path).expect("read pdf");
20    let pages = page_count(&bytes, None).expect("page count");
21
22    for p in 0..pages as i32 {
23        let objs = debug_text_objects(&bytes, p);
24        if !objs.iter().any(|o| o.text.contains(&needle)) {
25            continue;
26        }
27        let visible = objs.iter().filter(|o| !o.invisible).count();
28        let invisible = objs.iter().filter(|o| o.invisible).count();
29        println!(
30            "page {p}: {} text object(s) — {visible} visible, {invisible} INVISIBLE\n",
31            objs.len()
32        );
33        for o in &objs {
34            let tag = if o.invisible {
35                "INVISIBLE"
36            } else {
37                "visible  "
38            };
39            let text: String = o.text.chars().take(70).collect();
40            println!(
41                "  [{tag}] l={:8.2} b={:8.2} r={:8.2} t={:8.2}  {text:?}",
42                o.l, o.b, o.r, o.t
43            );
44        }
45        return;
46    }
47    println!("needle {needle:?} not found in any text object across {pages} page(s)");
48}