Skip to main content

binocular/preview/pdf/
mod.rs

1//! PDF preview: extracts and displays text content from PDF files.
2
3use crate::preview::doc::PreviewDoc;
4use ratatui::style::Color;
5use ratatui::text::Text;
6use std::path::Path;
7
8mod extract;
9
10const MAX_PREVIEW_PAGES: u32 = 10;
11
12const MAX_LINE_DISPLAY_CHARS: usize = 500;
13
14pub fn is_pdf(path: &Path) -> bool {
15    path.extension()
16        .and_then(|e| e.to_str())
17        .map(|e| e.eq_ignore_ascii_case("pdf"))
18        .unwrap_or(false)
19}
20
21pub fn generate_preview(path: &Path) -> Text<'static> {
22    let mut doc = PreviewDoc::new();
23
24    if let Ok(meta) = std::fs::metadata(path) {
25        use crate::preview::doc::format_file_size;
26        doc.push_section("File Info");
27        doc.push_field("Size", format_file_size(meta.len()), Color::White);
28        doc.push_blank_line();
29    }
30
31    let doc_result = lopdf::Document::load(path);
32    let pdf = match doc_result {
33        Ok(d) => d,
34        Err(e) => {
35            doc.push_section("Error");
36            doc.push_field("Message", e.to_string(), Color::Red);
37            return doc.into_text();
38        }
39    };
40
41    let page_count = pdf.get_pages().len() as u32;
42    doc.push_section("Document Info");
43    doc.push_field("Pages", page_count.to_string(), Color::White);
44
45    if let Ok(info) = pdf
46        .trailer
47        .get(b"Info")
48        .and_then(|o| pdf.get_object(o.as_reference()?))
49    {
50        if let Ok(dict) = info.as_dict() {
51            for (key, label) in &[
52                (b"Title" as &[u8], "Title"),
53                (b"Author", "Author"),
54                (b"Subject", "Subject"),
55                (b"Creator", "Creator"),
56                (b"Producer", "Producer"),
57                (b"CreationDate", "Created"),
58            ] {
59                if let Ok(val) = dict.get(key) {
60                    if let Ok(s) = val.as_str() {
61                        let decoded = extract::decode_pdf_string(s);
62                        if !decoded.is_empty() {
63                            doc.push_field(label, decoded, Color::White);
64                        }
65                    }
66                }
67            }
68        }
69    }
70
71    doc.push_blank_line();
72
73    let pages_to_show = page_count.min(MAX_PREVIEW_PAGES);
74    doc.push_section(Box::leak(
75        format!("Content (first {} of {} pages)", pages_to_show, page_count).into_boxed_str(),
76    ));
77
78    let page_ids: Vec<(u32, lopdf::ObjectId)> = pdf.get_pages().into_iter().collect();
79    let mut page_ids_sorted: Vec<(u32, lopdf::ObjectId)> = page_ids;
80    page_ids_sorted.sort_by_key(|(n, _)| *n);
81
82    for (page_num, page_id) in page_ids_sorted.iter().take(pages_to_show as usize) {
83        let page_label = Box::leak(format!("Page {}", page_num).into_boxed_str()) as &'static str;
84        doc.push_section(page_label);
85
86        match extract::extract_page_text(&pdf, *page_id) {
87            Ok(text) if text.trim().is_empty() => {
88                doc.push_muted_italic("   (no extractable text — possibly image-based)");
89            }
90            Ok(text) => {
91                for line in text.lines().filter(|l| !l.trim().is_empty()) {
92                    let (display, _) =
93                        crate::text::truncate_str_chars(line.trim(), MAX_LINE_DISPLAY_CHARS);
94                    doc.push_field("  ", display.to_string(), Color::White);
95                }
96            }
97            Err(_) => {
98                doc.push_muted_italic("   (could not extract text from this page)");
99            }
100        }
101        doc.push_blank_line();
102    }
103
104    if page_count > MAX_PREVIEW_PAGES {
105        doc.push_muted_italic(format!(
106            "   … {} more pages not shown",
107            page_count - MAX_PREVIEW_PAGES
108        ));
109    }
110
111    doc.into_text()
112}
113
114pub fn extract_all_text(path: &Path) -> lopdf::Result<Vec<String>> {
115    extract::extract_all_text(path)
116}