binocular/preview/pdf/
mod.rs1use crate::preview::doc::PreviewDoc;
4use ratatui::style::Color;
5use ratatui::text::Text;
6use std::path::Path;
7
8mod extract;
9
10const MAX_PREVIEW_PAGES: u32 = 10;
11
12const MAX_LINE_DISPLAY_CHARS: usize = 500;
13
14pub fn is_pdf(path: &Path) -> bool {
15 path.extension()
16 .and_then(|e| e.to_str())
17 .map(|e| e.eq_ignore_ascii_case("pdf"))
18 .unwrap_or(false)
19}
20
21pub fn generate_preview(path: &Path) -> Text<'static> {
22 let mut doc = PreviewDoc::new();
23
24 if let Ok(meta) = std::fs::metadata(path) {
25 use crate::preview::doc::format_file_size;
26 doc.push_section("File Info");
27 doc.push_field("Size", format_file_size(meta.len()), Color::White);
28 doc.push_blank_line();
29 }
30
31 let doc_result = lopdf::Document::load(path);
32 let pdf = match doc_result {
33 Ok(d) => d,
34 Err(e) => {
35 doc.push_section("Error");
36 doc.push_field("Message", e.to_string(), Color::Red);
37 return doc.into_text();
38 }
39 };
40
41 let page_count = pdf.get_pages().len() as u32;
42 doc.push_section("Document Info");
43 doc.push_field("Pages", page_count.to_string(), Color::White);
44
45 if let Ok(info) = pdf
46 .trailer
47 .get(b"Info")
48 .and_then(|o| pdf.get_object(o.as_reference()?))
49 {
50 if let Ok(dict) = info.as_dict() {
51 for (key, label) in &[
52 (b"Title" as &[u8], "Title"),
53 (b"Author", "Author"),
54 (b"Subject", "Subject"),
55 (b"Creator", "Creator"),
56 (b"Producer", "Producer"),
57 (b"CreationDate", "Created"),
58 ] {
59 if let Ok(val) = dict.get(key) {
60 if let Ok(s) = val.as_str() {
61 let decoded = extract::decode_pdf_string(s);
62 if !decoded.is_empty() {
63 doc.push_field(label, decoded, Color::White);
64 }
65 }
66 }
67 }
68 }
69 }
70
71 doc.push_blank_line();
72
73 let pages_to_show = page_count.min(MAX_PREVIEW_PAGES);
74 doc.push_section(Box::leak(
75 format!("Content (first {} of {} pages)", pages_to_show, page_count).into_boxed_str(),
76 ));
77
78 let page_ids: Vec<(u32, lopdf::ObjectId)> = pdf.get_pages().into_iter().collect();
79 let mut page_ids_sorted: Vec<(u32, lopdf::ObjectId)> = page_ids;
80 page_ids_sorted.sort_by_key(|(n, _)| *n);
81
82 for (page_num, page_id) in page_ids_sorted.iter().take(pages_to_show as usize) {
83 let page_label = Box::leak(format!("Page {}", page_num).into_boxed_str()) as &'static str;
84 doc.push_section(page_label);
85
86 match extract::extract_page_text(&pdf, *page_id) {
87 Ok(text) if text.trim().is_empty() => {
88 doc.push_muted_italic(" (no extractable text — possibly image-based)");
89 }
90 Ok(text) => {
91 for line in text.lines().filter(|l| !l.trim().is_empty()) {
92 let (display, _) =
93 crate::text::truncate_str_chars(line.trim(), MAX_LINE_DISPLAY_CHARS);
94 doc.push_field(" ", display.to_string(), Color::White);
95 }
96 }
97 Err(_) => {
98 doc.push_muted_italic(" (could not extract text from this page)");
99 }
100 }
101 doc.push_blank_line();
102 }
103
104 if page_count > MAX_PREVIEW_PAGES {
105 doc.push_muted_italic(format!(
106 " … {} more pages not shown",
107 page_count - MAX_PREVIEW_PAGES
108 ));
109 }
110
111 doc.into_text()
112}
113
114pub fn extract_all_text(path: &Path) -> lopdf::Result<Vec<String>> {
115 extract::extract_all_text(path)
116}