c2pdf/
code_to_pdf.rs

1//! Contains [`HighlighterConfig`] and [`CodeToPdf`] structs
2
3use std::{
4  cmp::Ordering,
5  collections::BTreeMap,
6  ffi::OsStr,
7  fs,
8  io::BufRead,
9  mem,
10  path::Path,
11  sync::{Arc, Mutex},
12};
13
14use ignore::Walk;
15use printpdf::{
16  FontId, Op, PdfDocument, PdfPage, Pt, Px, RawImage, TextItem, XObject, XObjectId,
17  XObjectRotation, XObjectTransform, color,
18};
19use syntect::{
20  easy::HighlightFile,
21  highlighting::{Color, Style, Theme},
22  parsing::SyntaxSet,
23};
24
25use crate::{dimensions::Dimensions, helpers::init_page, text_manipulation::TextWrapper};
26
27/// Configuration struct for the highlighter ([`syntect`])
28///
29/// Contains the desired theme, syntax set, and the maximum line length to highlight
30pub struct HighlighterConfig {
31  syntax_set: SyntaxSet,
32  theme: Theme,
33  max_line_len_to_highlight: usize,
34}
35impl HighlighterConfig {
36  /// Initialises new [`HighlighterConfig`]
37  pub fn new(syntax_set: SyntaxSet, theme: Theme) -> Self {
38    Self {
39      syntax_set,
40      theme,
41      max_line_len_to_highlight: 20_000,
42    }
43  }
44}
45/// Subset of `PdfDocument`. Created as some types within `PdfDocument` weren't sync so it couldn't be used with `rayon`
46#[derive(Default)]
47pub struct DocumentSubset {
48  x_object_map: BTreeMap<XObjectId, XObject>,
49  // font_map: Arc<Mutex<BTreeMap<FontId, ParsedFont>>>,
50  pages: Vec<(PdfPage, usize)>,
51}
52impl DocumentSubset {
53  /// Add an image
54  pub fn add_image(&mut self, image: &RawImage) -> XObjectId {
55    let id = XObjectId::new();
56    self
57      .x_object_map
58      .insert(id.clone(), XObject::Image(image.clone()));
59    id
60  }
61  /// Append everything from the `DocumentSubset` into the actual PdfDocument
62  pub fn to_document(&mut self, doc: &mut PdfDocument) {
63    let x_obj_map = mem::take(&mut self.x_object_map);
64    doc.resources.xobjects.map = x_obj_map;
65    let mut pages = mem::take(&mut self.pages);
66    // Sort into order from the walker
67    // This brings back determinism :)
68    pages.sort_by(|a, b| {
69      let ia = a.1;
70      let ib = b.1;
71      if ia > ib {
72        Ordering::Greater
73      } else if ia < ib {
74        Ordering::Less
75      } else {
76        Ordering::Equal
77      }
78    });
79    doc.pages = pages.into_iter().map(|f| f.0).collect();
80  }
81  // pub fn add_font(&mut self, font: &ParsedFont) -> FontId {
82  //     let id = FontId::new();
83  //     self.font_map.lock().unwrap().insert(id.clone(), font.clone());
84  //     id
85  // }
86}
87fn to_rgb(c: Color) -> color::Rgb {
88  color::Rgb {
89    r: (c.r as f32) / 255.0,
90    g: (c.g as f32) / 255.0,
91    b: (c.b as f32) / 255.0,
92    icc_profile: None,
93  }
94}
95/// Main struct for generating PDFs.
96/// It handles almost the entire process of reading and highlighting code,
97/// as well as actually writing it to the PDF
98pub struct CodeToPdf {
99  current_page_contents: Vec<Op>,
100  doc: Arc<Mutex<DocumentSubset>>,
101  font_id: FontId,
102  page_dimensions: Dimensions,
103  text_wrapper: TextWrapper,
104  processed_file_count: usize,
105  // Text to put at the top of every page
106  page_text: Option<String>,
107}
108impl CodeToPdf {
109  /// Initialises a new [`CodeToPdf`]
110  pub fn new(
111    doc: Arc<Mutex<DocumentSubset>>,
112    font_id: FontId,
113    page_dimensions: Dimensions,
114    text_wrapper: TextWrapper,
115    page_text: Option<String>,
116  ) -> Self {
117    Self {
118      current_page_contents: vec![],
119      doc,
120      font_id,
121      page_dimensions,
122      text_wrapper,
123      processed_file_count: 0,
124      page_text,
125    }
126  }
127  /// Saves the current page contents to the document, and clears [`CodeToPdf::current_page_contents`]
128  fn save_page(&mut self, index: usize) {
129    let contents = std::mem::take(&mut self.current_page_contents);
130    let page = PdfPage::new(
131      self.page_dimensions.width,
132      self.page_dimensions.height,
133      contents,
134    );
135    _ = self.doc.lock().map(|mut doc| {
136      doc.pages.push((page, index));
137    });
138    // self.doc.pages.push(page);
139  }
140
141  /// Initialises [`CodeToPdf::current_page_contents`] with basic contents
142  fn init_page(&mut self, path: &Path) {
143    // Should never be called on a non-empty current_pages_contents, so check it in debug mode
144    debug_assert_eq!(self.current_page_contents.len(), 0);
145
146    init_page(
147      &mut self.current_page_contents,
148      &self.page_dimensions,
149      self.font_id.clone(),
150      self.text_wrapper.font_size(),
151      path,
152      self.page_text.as_deref(),
153      &mut self.text_wrapper,
154    );
155  }
156  /// Computes maximum number of lines that can be displayed on a page
157  fn max_line_count(&self) -> u32 {
158    let max_height = self.page_dimensions.max_text_height();
159    ((max_height).into_pt().0 / (self.text_wrapper.font_size() * 1.2)).floor() as u32
160  }
161  /// Increment given line_count. Begin a new page if it's too high
162  /// Returns `true` if a new page is created
163  fn increment_line_count(
164    &mut self,
165    line_count: &mut u32,
166    path: &Path,
167    index: usize,
168    has_added_text: &mut bool,
169  ) -> bool {
170    *line_count += 1;
171    if *line_count > self.max_line_count() {
172      self.save_page(index);
173      self.init_page(path);
174      *has_added_text = false;
175      *line_count = 0;
176      true
177    } else {
178      false
179    }
180  }
181  /// Generates all the pages for a file
182  fn generate_highlighted_pages(
183    &mut self,
184    highlighter: &mut HighlightFile,
185    path: &Path,
186    highlighter_config: &HighlighterConfig,
187    index: usize,
188  ) {
189    let mut line = String::new();
190    let mut line_count = 0;
191    self.init_page(path);
192    let mut has_added_text = false;
193    let mut prev_colour = Color::BLACK;
194    while highlighter.reader.read_line(&mut line).unwrap_or(0) > 0 {
195      has_added_text = true;
196      // Store the char count for the current line
197      let mut line_width = 0.0;
198      let regions: &[(Style, &str)] = if line.len() < highlighter_config.max_line_len_to_highlight {
199        &highlighter
200          .highlight_lines
201          .highlight_line(&line, &highlighter_config.syntax_set)
202          .unwrap()
203      } else {
204        &[(
205          Style {
206            foreground: Color::BLACK,
207            background: Color::WHITE,
208            font_style: syntect::highlighting::FontStyle::default(),
209          },
210          &line,
211        )]
212      };
213      for (style, text) in regions {
214        let text_width = self.text_wrapper.get_width(text).0;
215
216        let line_width_remaining = self.page_dimensions.max_text_width().into_pt().0 - line_width;
217
218        let text_colour = style.foreground;
219        // Set PDF text colour if the colour of the current region is different to the colour of the previous region
220        if text_colour != prev_colour {
221          self.current_page_contents.push(Op::SetFillColor {
222            col: color::Color::Rgb(to_rgb(text_colour)),
223          });
224          prev_colour = text_colour;
225        }
226        // Split into lines, with the length of the first line being the length remaining on the current line
227        let lines = self.text_wrapper.split_into_lines(text, |i| match i {
228          0 => Pt(line_width_remaining),
229          _ => self.page_dimensions.max_text_width().into_pt(),
230        });
231        match lines.len() {
232          // If only a single line, then no new lines are going to be made (as we're processing a region here)
233          1 => {
234            self.current_page_contents.push(Op::WriteText {
235              items: vec![TextItem::Text(
236                text
237                  .trim_end_matches(|x| (x == '\n') | (x == '\r'))
238                  .to_string(),
239              )],
240              font: self.font_id.clone(),
241            });
242            line_width += text_width;
243          }
244          // If the region is too long to fit onto the current line, write to multiple different lines
245          _ => {
246            let mut first = true;
247            for (l, width) in lines {
248              if !first {
249                self.current_page_contents.push(Op::AddLineBreak);
250                line_width = 0.0;
251              }
252              first = false;
253              line_width += width;
254              self.current_page_contents.push(Op::WriteText {
255                items: vec![TextItem::Text(l)],
256                font: self.font_id.clone(),
257              });
258              self.increment_line_count(&mut line_count, path, index, &mut has_added_text);
259            }
260          }
261        }
262      }
263
264      if !self.increment_line_count(&mut line_count, path, index, &mut has_added_text) {
265        self.current_page_contents.push(Op::AddLineBreak);
266      }
267      line.clear();
268    }
269    // Clear page if no text has been added to it
270    if has_added_text {
271      self.save_page(index);
272    } else {
273      self.current_page_contents.clear()
274    }
275  }
276
277  /// Generates a page containing the image at the path given
278  fn generate_image_page(&mut self, path: &Path, index: usize) {
279    let bytes = if let Ok(b) = fs::read(path) {
280      b
281    } else {
282      return;
283    };
284    let image = if let Ok(img) = RawImage::decode_from_bytes(&bytes, &mut vec![]) {
285      img
286    } else {
287      return;
288    };
289    self.init_page(path);
290    // let image_id = self.doc.add_image(&image);
291    let image_id = self
292      .doc
293      .lock()
294      .map(|mut doc| doc.add_image(&image))
295      .unwrap();
296    let pg_x_dpi = self.page_dimensions.width.into_pt().into_px(300.0).0;
297    let pg_y_dpi = self.page_dimensions.height.into_pt().into_px(300.0).0;
298
299    let x_scaling = pg_x_dpi as f32 / image.width as f32;
300    let y_scaling = pg_y_dpi as f32 / image.height as f32;
301
302    let scale = f32::min(x_scaling, y_scaling);
303    // If width is significantly bigger than the height, rotate so it's oriented to fill more of the page
304    let rotation = if image.width > (image.height as f32 * 1.25) as usize {
305      Some(XObjectRotation {
306        angle_ccw_degrees: -90.0,
307        rotation_center_x: Px(((image.width as f32 * scale) / 2.0) as usize),
308        rotation_center_y: Px(((image.height as f32 * scale) / 2.0) as usize),
309      })
310    } else {
311      None
312    };
313    self.current_page_contents.push(Op::UseXobject {
314      id: image_id.clone(),
315      transform: XObjectTransform {
316        scale_x: Some(scale),
317        scale_y: Some(scale),
318        rotate: rotation,
319        ..Default::default()
320      },
321    });
322    self.save_page(index);
323  }
324  /// Generates pages for a file
325  pub fn process_file(
326    &mut self,
327    file: &Path,
328    highlighter_config: &HighlighterConfig,
329    index: usize,
330  ) -> Result<(), Box<dyn std::error::Error>> {
331    println!("Generating pages for {}, index {index}", file.display());
332    self.processed_file_count += 1;
333    match file.extension().and_then(OsStr::to_str) {
334      Some("jpg" | "jpeg" | "png" | "ico" | "bmp" | "webp") => {
335        self.generate_image_page(file, index);
336        Ok(())
337      }
338      _ => {
339        let mut highlighter = HighlightFile::new(
340          file,
341          &highlighter_config.syntax_set,
342          &highlighter_config.theme,
343        )?;
344
345        self.generate_highlighted_pages(&mut highlighter, file, highlighter_config, index);
346
347        Ok(())
348      }
349    }
350  }
351  /// Consumes entire walker
352  pub fn process_files(&mut self, walker: Walk, highlighter_config: HighlighterConfig) {
353    for result in walker {
354      match result {
355        Ok(entry) => {
356          if entry.file_type().is_some_and(|f| f.is_file()) {
357            if let Err(err) = self.process_file(entry.path(), &highlighter_config, 0) {
358              println!("ERROR: {}", err)
359            }
360          }
361        }
362        Err(err) => println!("ERROR: {}", err),
363      }
364    }
365  }
366
367  /// Returns number of files processed by [`CodeToPdf::process_files`]
368  pub fn processed_file_count(&self) -> usize {
369    self.processed_file_count
370  }
371}