Skip to main content

gbscraper/writer/
pdf.rs

1use lopdf::content::{Content, Operation};
2use lopdf::{dictionary, Bookmark};
3use lopdf::{Document, Object, Stream};
4use std::collections::HashMap;
5use std::{fs, io};
6
7/// Table of contents for mapping image files to page names.
8pub struct TableOfContents {
9    lookup: HashMap<String, TocEntry>,
10}
11
12struct TocEntry {
13    pub page_title: String,
14    /// 0, 1 for italic, 2 for bold, 3 for italic bold
15    pub format: u32,
16    /// R,G,B
17    pub color: [f32; 3],
18    // TODO: descendants???
19}
20
21impl TocEntry {
22    fn new(page_title: String, format: u32, color: [f32; 3]) -> TocEntry {
23        TocEntry {
24            page_title,
25            format,
26            color,
27        }
28    }
29}
30
31impl Default for TableOfContents {
32    fn default() -> Self {
33        Self::new()
34    }
35}
36
37impl TableOfContents {
38    pub fn new() -> TableOfContents {
39        TableOfContents {
40            lookup: HashMap::<String, TocEntry>::new(),
41        }
42    }
43
44    /// Adds entry to table of contents.
45    ///
46    /// # Arguments
47    ///
48    /// * `page_title` - Title of page as it will appear in document outline.
49    /// * `page_filename` - Filename of image to link to.
50    pub fn add_page(&mut self, page_title: &str, page_filename: &str) {
51        self.add_page_internal(
52            page_filename,
53            TocEntry::new(page_title.to_string(), 0, [0., 0., 0.]),
54        );
55    }
56
57    /// Adds entry to table of contents.
58    ///
59    /// # Arguments
60    ///
61    /// * `page_title` - Title of page as it will appear in document outline.
62    /// * `page_filename` - Filename of image to link to.
63    /// * `format` - 0, 1 for italic, 2 for bold, 3 for italic bold.
64    /// * `color` - R,G,B
65    pub fn add_page_extra(
66        &mut self,
67        page_title: &str,
68        page_filename: &str,
69        format: u32,
70        color: [f32; 3],
71    ) {
72        self.add_page_internal(
73            page_filename,
74            TocEntry::new(page_title.to_string(), format, color),
75        );
76    }
77
78    fn add_page_internal(&mut self, page_filename: &str, entry: TocEntry) {
79        self.lookup.insert(page_filename.to_string(), entry);
80    }
81
82    fn get_page_info(&self, page_filename: &String) -> Option<&TocEntry> {
83        self.lookup.get(page_filename)
84    }
85}
86
87/// Creates a PDF from images in a specified directory.
88///
89/// # Arguments
90///
91/// * `image_dir` - Directory where images to be converted into pages of PDF exist.
92/// * `target_filename` - Path to save PDF to, including filename and extension.
93pub fn create_pdf(image_dir: &str, target_filename: &str) -> io::Result<()> {
94    create_pdf_internal(image_dir, target_filename, None)
95}
96
97/// Creates a PDF from images in a specified directory.
98///
99/// # Arguments
100///
101/// * `image_dir` - Directory where images to be converted into pages of PDF exist.
102/// * `target_filename` - Path to save PDF to, including filename and extension.
103/// * `toc` - Table fo contents mapping image files to page titles.
104pub fn create_pdf_with_toc(
105    image_dir: &str,
106    target_filename: &str,
107    toc: &TableOfContents,
108) -> io::Result<()> {
109    create_pdf_internal(image_dir, target_filename, Some(toc))
110}
111
112fn create_pdf_internal(
113    image_dir: &str,
114    target_filename: &str,
115    toc: Option<&TableOfContents>,
116) -> io::Result<()> {
117    // Initialize document
118    let mut doc = Document::with_version("1.5");
119    let pages_id = doc.new_object_id();
120    let font_id = doc.add_object(dictionary! {
121        "Type" => "Font",
122        "Subtype" => "Type1",
123        "BaseFont" => "Courier",
124    });
125    let resources_id = doc.add_object(dictionary! {
126        "Font" => dictionary! {
127            "F1" => font_id,
128        },
129    });
130
131    // Add page for each image
132    let mut pages = vec![];
133    let mut entries: Vec<_> = fs::read_dir(image_dir)?
134        .collect::<io::Result<_>>()?;
135    entries.sort_by_key(|e| e.file_name());
136    for p in entries {
137        let name = p.file_name().into_string().map_err(|file_name| {
138            io::Error::new(
139                io::ErrorKind::InvalidData,
140                format!("image filename is not valid UTF-8: {:?}", file_name),
141            )
142        })?;
143
144        let image_path = p.path();
145        let image_path_str = image_path.to_str().ok_or_else(|| {
146            io::Error::new(
147                io::ErrorKind::InvalidData,
148                format!("image path is not valid UTF-8: {:?}", image_path),
149            )
150        })?;
151
152        let stream = lopdf::xobject::image(image_path_str).map_err(|e| {
153            io::Error::new(
154                io::ErrorKind::InvalidData,
155                format!("failed to load image '{}': {e}", name),
156            )
157        })?;
158        let content = Content {
159            operations: Vec::<Operation>::new(),
160        };
161        let encoded_content = content.encode().map_err(|e| {
162            io::Error::new(
163                io::ErrorKind::InvalidData,
164                format!("failed to encode PDF content stream for {}: {e}", name),
165            )
166        })?;
167        let content_id = doc.add_object(Stream::new(dictionary! {}, encoded_content));
168
169        let mut width: i64 = 800;
170        let mut height: i64 = 1100;
171        if let Ok(Object::Integer(a)) = stream.dict.get("Width".as_bytes()) {
172            width = *a;
173        }
174        if let Ok(Object::Integer(a)) = stream.dict.get("Height".as_bytes()) {
175            height = *a;
176        }
177
178        let image_filename = doc.add_object(dictionary! {
179            "Type" => "Page",
180            "Parent" => pages_id,
181            "Contents" => content_id,
182            "MediaBox" => vec![0.into(), 0.into(), width.into(), height.into()],
183        });
184
185        doc.insert_image(
186            image_filename,
187            stream,
188            (0., 0.),
189            (width as f32, height as f32),
190        )
191        .map_err(|err| {
192            io::Error::other(format!("failed to insert image '{name}' into PDF: {err}"))
193        })?;
194
195        pages.push(image_filename.into());
196
197        // Check for TOC entry for this page
198        if let Some(t) = toc {
199            if let Some(value) = t.get_page_info(&name) {
200                let b = Bookmark::new(
201                    value.page_title.clone(),
202                    value.color,
203                    value.format,
204                    image_filename,
205                );
206                doc.add_bookmark(b, None);
207            }
208        }
209
210        //TODO: links in page
211        //Note: may need to download image without setting "w=3000" first in order to scale coordinates
212    }
213
214    // Finalize and save document
215    let len = pages.len() as u32;
216    doc.objects.insert(
217        pages_id,
218        Object::Dictionary(dictionary! {
219            "Type" => "Pages",
220            "Kids" => pages,
221            "Count" => len,
222            "Resources" => resources_id,
223        }),
224    );
225    let outline_id = doc.build_outline();
226    if let Some(ol) = outline_id {
227        let catalog_id = doc.add_object(dictionary! {
228            "Type" => "Catalog",
229            "Pages" => pages_id,
230            "Outlines" => ol,
231        });
232        doc.trailer.set("Root", catalog_id);
233    } else {
234        let catalog_id = doc.add_object(dictionary! {
235            "Type" => "Catalog",
236            "Pages" => pages_id,
237        });
238        doc.trailer.set("Root", catalog_id);
239    }
240
241    doc.compress();
242    doc.save(target_filename)?;
243    Ok(())
244}