gbscraper/writer/
pdf.rs

1use lopdf::content::{Content, Operation};
2use lopdf::{dictionary, Bookmark};
3use lopdf::{Document, Object, Stream};
4use std::collections::HashMap;
5use std::{fs, io};
6
7/// Table of contents for mapping image files to page names.
8pub struct TableOfContents {
9    lookup: HashMap<String, TocEntry>,
10}
11
12struct TocEntry {
13    pub page_title: String,
14    /// 0, 1 for italic, 2 for bold, 3 for italic bold
15    pub format: u32,
16    /// R,G,B
17    pub color: [f32; 3],
18    // TODO: descendants???
19}
20
21impl TocEntry {
22    fn new(page_title: String, format: u32, color: [f32; 3]) -> TocEntry {
23        TocEntry {
24            page_title,
25            format,
26            color,
27        }
28    }
29}
30
31impl TableOfContents {
32    pub fn new() -> TableOfContents {
33        TableOfContents {
34            lookup: HashMap::<String, TocEntry>::new(),
35        }
36    }
37
38    /// Adds entry to table of contents.
39    ///
40    /// # Arguments
41    ///
42    /// * `page_title` - Title of page as it will appear in document outline.
43    /// * `page_filename` - Filename of image to link to.
44    pub fn add_page(&mut self, page_title: &str, page_filename: &str) {
45        self.add_page_internal(
46            page_filename,
47            TocEntry::new(page_title.to_string(), 0, [0., 0., 0.]),
48        );
49    }
50
51    /// Adds entry to table of contents.
52    ///
53    /// # Arguments
54    ///
55    /// * `page_title` - Title of page as it will appear in document outline.
56    /// * `page_filename` - Filename of image to link to.
57    /// * `format` - 0, 1 for italic, 2 for bold, 3 for italic bold.
58    /// * `color` - R,G,B
59    pub fn add_page_extra(
60        &mut self,
61        page_title: &str,
62        page_filename: &str,
63        format: u32,
64        color: [f32; 3],
65    ) {
66        self.add_page_internal(
67            page_filename,
68            TocEntry::new(page_title.to_string(), format, color),
69        );
70    }
71
72    fn add_page_internal(&mut self, page_filename: &str, entry: TocEntry) {
73        self.lookup.insert(page_filename.to_string(), entry);
74    }
75
76    fn get_page_info(&self, page_filename: &String) -> Option<&TocEntry> {
77        self.lookup.get(page_filename)
78    }
79}
80
81/// Creates a PDF from images in a specified directory.
82///
83/// # Arguments
84///
85/// * `image_dir` - Directory where images to be converted into pages of PDF exist.
86/// * `target_filename` - Path to save PDF to, including filename and extension.
87pub fn create_pdf(image_dir: &str, target_filename: &str) -> io::Result<()> {
88    create_pdf_internal(image_dir, target_filename, None)
89}
90
91/// Creates a PDF from images in a specified directory.
92///
93/// # Arguments
94///
95/// * `image_dir` - Directory where images to be converted into pages of PDF exist.
96/// * `target_filename` - Path to save PDF to, including filename and extension.
97/// * `toc` - Table fo contents mapping image files to page titles.
98pub fn create_pdf_with_toc(
99    image_dir: &str,
100    target_filename: &str,
101    toc: &TableOfContents,
102) -> io::Result<()> {
103    create_pdf_internal(image_dir, target_filename, Some(toc))
104}
105
106fn create_pdf_internal(
107    image_dir: &str,
108    target_filename: &str,
109    toc: Option<&TableOfContents>,
110) -> io::Result<()> {
111    // Initialize document
112    let mut doc = Document::with_version("1.5");
113    let pages_id = doc.new_object_id();
114    let font_id = doc.add_object(dictionary! {
115        "Type" => "Font",
116        "Subtype" => "Type1",
117        "BaseFont" => "Courier",
118    });
119    let resources_id = doc.add_object(dictionary! {
120        "Font" => dictionary! {
121            "F1" => font_id,
122        },
123    });
124
125    // Add page for each image
126    let mut pages = vec![];
127    let paths = fs::read_dir(image_dir)?;
128    for path in paths {
129        if let Ok(p) = path {
130            let name = p.file_name().into_string().unwrap();
131
132            if let Ok(stream) = lopdf::xobject::image(p.path().as_os_str().to_str().unwrap()) {
133                let content = Content {
134                    operations: Vec::<Operation>::new(),
135                };
136                let content_id =
137                    doc.add_object(Stream::new(dictionary! {}, content.encode().unwrap()));
138
139                let mut width: i64 = 800;
140                let mut height: i64 = 1100;
141                if let Object::Integer(a) = stream.dict.get("Width".as_bytes()).unwrap() {
142                    width = *a;
143                }
144                if let Object::Integer(a) = stream.dict.get("Height".as_bytes()).unwrap() {
145                    height = *a;
146                }
147
148                let image_filename = doc.add_object(dictionary! {
149                    "Type" => "Page",
150                    "Parent" => pages_id,
151                    "Contents" => content_id,
152                    "MediaBox" => vec![0.into(), 0.into(), width.into(), height.into()],
153                });
154
155                let result = doc.insert_image(
156                    image_filename,
157                    stream,
158                    (0., 0.),
159                    (width as f32, height as f32),
160                );
161                if result.is_err() {
162                    println!("error!: {name}")
163                }
164
165                pages.push(image_filename.into());
166
167                // Check for TOC entry for this page
168                if let Some(t) = toc {
169                    if let Some(value) = t.get_page_info(&name) {
170                        let b = Bookmark::new(
171                            value.page_title.clone(),
172                            value.color,
173                            value.format,
174                            image_filename,
175                        );
176                        doc.add_bookmark(b, None);
177                    }
178                }
179
180                //TODO: links in page
181                //Note: may need to download image without setting "w=3000" first in order to scale coordinates
182            }
183        }
184    }
185
186    // Finalize and save document
187    let len = pages.len() as u32;
188    doc.objects.insert(
189        pages_id,
190        Object::Dictionary(dictionary! {
191            "Type" => "Pages",
192            "Kids" => pages,
193            "Count" => len,
194            "Resources" => resources_id,
195        }),
196    );
197    let outline_id = doc.build_outline();
198    if let Some(ol) = outline_id {
199        let catalog_id = doc.add_object(dictionary! {
200            "Type" => "Catalog",
201            "Pages" => pages_id,
202            "Outlines" => ol,
203        });
204        doc.trailer.set("Root", catalog_id);
205    } else {
206        let catalog_id = doc.add_object(dictionary! {
207            "Type" => "Catalog",
208            "Pages" => pages_id,
209        });
210        doc.trailer.set("Root", catalog_id);
211    }
212
213    doc.compress();
214    doc.save(target_filename)?;
215    Ok(())
216}