c2pdf/
code_to_pdf.rs

1//! Contains [`HighlighterConfig`] and [`CodeToPdf`] structs
2
3use std::{
4  cmp::Ordering,
5  collections::BTreeMap,
6  ffi::OsStr,
7  fs,
8  io::BufRead,
9  mem,
10  path::Path,
11  sync::{Arc, Mutex},
12};
13
14use ignore::Walk;
15use log::error;
16use printpdf::{
17  FontId, Op, PdfDocument, PdfPage, Pt, Px, RawImage, TextItem, XObject, XObjectId,
18  XObjectRotation, XObjectTransform, color,
19};
20use syntect::{
21  easy::HighlightFile,
22  highlighting::{Color, Style, Theme},
23  parsing::SyntaxSet,
24};
25
26use crate::{
27  dimensions::Dimensions,
28  helpers::{ProcessedText, init_page},
29  text_manipulation::TextWrapper,
30};
31
32/// Configuration struct for the highlighter ([`syntect`])
33///
34/// Contains the desired theme, syntax set, and the maximum line length to highlight
35pub struct HighlighterConfig {
36  syntax_set: SyntaxSet,
37  theme: Theme,
38  max_line_len_to_highlight: usize,
39}
40impl HighlighterConfig {
41  /// Initialises new [`HighlighterConfig`]
42  pub fn new(syntax_set: SyntaxSet, theme: Theme) -> Self {
43    Self {
44      syntax_set,
45      theme,
46      max_line_len_to_highlight: 20_000,
47    }
48  }
49}
50/// Subset of `PdfDocument`. Created as some types within `PdfDocument` weren't sync so it couldn't be used with `rayon`
51#[derive(Default)]
52pub struct DocumentSubset {
53  x_object_map: BTreeMap<XObjectId, XObject>,
54  // font_map: Arc<Mutex<BTreeMap<FontId, ParsedFont>>>,
55  pages: Vec<(PdfPage, usize)>,
56}
57impl DocumentSubset {
58  /// Add an image
59  pub fn add_image(&mut self, image: &RawImage) -> XObjectId {
60    let id = XObjectId::new();
61    self
62      .x_object_map
63      .insert(id.clone(), XObject::Image(image.clone()));
64    id
65  }
66  /// Append everything from the `DocumentSubset` into the actual PdfDocument
67  pub fn to_document(&mut self, doc: &mut PdfDocument) {
68    let x_obj_map = mem::take(&mut self.x_object_map);
69    doc.resources.xobjects.map = x_obj_map;
70    let mut pages = mem::take(&mut self.pages);
71    // Sort into order from the walker
72    // This brings back determinism :)
73    pages.sort_by(|a, b| {
74      let ia = a.1;
75      let ib = b.1;
76      if ia > ib {
77        Ordering::Greater
78      } else if ia < ib {
79        Ordering::Less
80      } else {
81        Ordering::Equal
82      }
83    });
84    doc.pages = pages.into_iter().map(|f| f.0).collect();
85  }
86}
87fn to_rgb(c: Color) -> color::Rgb {
88  color::Rgb {
89    r: (c.r as f32) / 255.0,
90    g: (c.g as f32) / 255.0,
91    b: (c.b as f32) / 255.0,
92    icc_profile: None,
93  }
94}
95
96/// Main struct for generating PDFs.
97/// It handles almost the entire process of reading and highlighting code,
98/// as well as actually writing it to the PDF
99pub struct CodeToPdf {
100  current_page_contents: Vec<Op>,
101  doc: Arc<Mutex<DocumentSubset>>,
102  font_id: FontId,
103  page_dimensions: Dimensions,
104  text_wrapper: TextWrapper,
105  processed_file_count: usize,
106  include_path: bool,
107  // Text to put at the top of every page
108  page_text: Option<ProcessedText>,
109}
110impl CodeToPdf {
111  /// Initialises a new [`CodeToPdf`]
112  pub fn new(
113    doc: Arc<Mutex<DocumentSubset>>,
114    font_id: FontId,
115    page_dimensions: Dimensions,
116    text_wrapper: TextWrapper,
117    page_text: Option<ProcessedText>,
118    include_path: bool,
119  ) -> Self {
120    Self {
121      current_page_contents: vec![],
122      doc,
123      font_id,
124      page_dimensions,
125      text_wrapper,
126      processed_file_count: 0,
127      page_text,
128      include_path,
129    }
130  }
131  /// Saves the current page contents to the document, and clears [`CodeToPdf::current_page_contents`]
132  fn save_page(&mut self, index: usize) {
133    self.current_page_contents.push(Op::EndTextSection);
134    let contents = std::mem::take(&mut self.current_page_contents);
135    let page = PdfPage::new(
136      self.page_dimensions.width,
137      self.page_dimensions.height,
138      contents,
139    );
140    _ = self.doc.lock().map(|mut doc| {
141      doc.pages.push((page, index));
142    });
143    // self.doc.pages.push(page);
144  }
145
146  /// Initialises [`CodeToPdf::current_page_contents`] with basic contents
147  fn init_page(&mut self, path: &Path) {
148    // Should never be called on a non-empty current_pages_contents, so check it in debug mode
149    debug_assert_eq!(self.current_page_contents.len(), 0);
150
151    init_page(
152      &mut self.current_page_contents,
153      &self.page_dimensions,
154      self.font_id.clone(),
155      self.text_wrapper.font_size(),
156      path,
157      self.page_text.as_ref(),
158      self.include_path,
159      &mut self.text_wrapper,
160    );
161  }
162  /// Computes maximum number of lines that can be displayed on a page
163  fn max_line_count(&self) -> u32 {
164    let max_height = self.page_dimensions.max_text_height();
165    ((max_height).into_pt().0 / (self.text_wrapper.font_size() * 1.2)).floor() as u32
166  }
167  /// Increment given line_count. Begin a new page if it's too high
168  /// Returns `true` if a new page is created
169  fn increment_line_count(
170    &mut self,
171    line_count: &mut u32,
172    path: &Path,
173    index: usize,
174    has_added_text: &mut bool,
175  ) -> bool {
176    *line_count += 1;
177    if *line_count > self.max_line_count() {
178      self.save_page(index);
179      self.init_page(path);
180      *has_added_text = false;
181      *line_count = 0;
182      true
183    } else {
184      false
185    }
186  }
187  /// Generates all the pages for a file
188  fn generate_highlighted_pages(
189    &mut self,
190    highlighter: &mut HighlightFile,
191    path: &Path,
192    highlighter_config: &HighlighterConfig,
193    index: usize,
194  ) {
195    let mut line = String::new();
196    let mut line_count = 0;
197    self.init_page(path);
198    let mut has_added_text = false;
199    let mut prev_colour = Color::BLACK;
200    while highlighter.reader.read_line(&mut line).unwrap_or(0) > 0 {
201      has_added_text = true;
202      // Store the char count for the current line
203      let mut line_width = 0.0;
204      let regions: &[(Style, &str)] = if line.len() < highlighter_config.max_line_len_to_highlight {
205        &highlighter
206          .highlight_lines
207          .highlight_line(&line, &highlighter_config.syntax_set)
208          .unwrap()
209      } else {
210        &[(
211          Style {
212            foreground: Color::BLACK,
213            background: Color::WHITE,
214            font_style: syntect::highlighting::FontStyle::default(),
215          },
216          &line,
217        )]
218      };
219      for (style, text) in regions {
220        let text_width = self.text_wrapper.get_width(text).0;
221
222        let line_width_remaining = self.page_dimensions.max_text_width().into_pt().0 - line_width;
223
224        let text_colour = style.foreground;
225        // Set PDF text colour if the colour of the current region is different to the colour of the previous region
226        if text_colour != prev_colour {
227          self.current_page_contents.push(Op::SetFillColor {
228            col: color::Color::Rgb(to_rgb(text_colour)),
229          });
230          prev_colour = text_colour;
231        }
232        // Split into lines, with the length of the first line being the length remaining on the current line
233        let lines = self.text_wrapper.split_into_lines(text, |i| match i {
234          0 => Pt(line_width_remaining),
235          _ => self.page_dimensions.max_text_width().into_pt(),
236        });
237        match lines.len() {
238          // If only a single line, then no new lines are going to be made (as we're processing a region here)
239          1 => {
240            self.current_page_contents.push(Op::WriteText {
241              items: vec![TextItem::Text(
242                text
243                  .trim_end_matches(|x| (x == '\n') | (x == '\r'))
244                  .to_string(),
245              )],
246              font: self.font_id.clone(),
247            });
248            line_width += text_width;
249          }
250          // If the region is too long to fit onto the current line, write to multiple different lines
251          _ => {
252            let mut first = true;
253            for (l, width) in lines {
254              if !first {
255                self.current_page_contents.push(Op::AddLineBreak);
256                line_width = 0.0;
257              }
258              first = false;
259              line_width += width;
260              self.current_page_contents.push(Op::WriteText {
261                items: vec![TextItem::Text(l)],
262                font: self.font_id.clone(),
263              });
264              self.increment_line_count(&mut line_count, path, index, &mut has_added_text);
265            }
266          }
267        }
268      }
269
270      if !self.increment_line_count(&mut line_count, path, index, &mut has_added_text) {
271        self.current_page_contents.push(Op::AddLineBreak);
272      }
273      line.clear();
274    }
275    // Clear page if no text has been added to it
276    if has_added_text {
277      self.save_page(index);
278    } else {
279      self.current_page_contents.clear()
280    }
281  }
282
283  /// Generates a page containing the image at the path given
284  fn generate_image_page(&mut self, path: &Path, index: usize) {
285    let bytes = if let Ok(b) = fs::read(path) {
286      b
287    } else {
288      return;
289    };
290    let image = if let Ok(img) = RawImage::decode_from_bytes(&bytes, &mut vec![]) {
291      img
292    } else {
293      return;
294    };
295    self.init_page(path);
296    // let image_id = self.doc.add_image(&image);
297    let image_id = self
298      .doc
299      .lock()
300      .map(|mut doc| doc.add_image(&image))
301      .unwrap();
302    let pg_x_dpi = self.page_dimensions.width.into_pt().into_px(300.0).0;
303    let pg_y_dpi = self.page_dimensions.height.into_pt().into_px(300.0).0;
304
305    let x_scaling = pg_x_dpi as f32 / image.width as f32;
306    let y_scaling = pg_y_dpi as f32 / image.height as f32;
307
308    let scale = f32::min(x_scaling, y_scaling);
309    // If width is significantly bigger than the height, rotate so it's oriented to fill more of the page
310    let rotation = if image.width > (image.height as f32 * 1.25) as usize {
311      Some(XObjectRotation {
312        angle_ccw_degrees: -90.0,
313        rotation_center_x: Px(((image.width as f32 * scale) / 2.0) as usize),
314        rotation_center_y: Px(((image.height as f32 * scale) / 2.0) as usize),
315      })
316    } else {
317      None
318    };
319
320    self.current_page_contents.push(Op::UseXobject {
321      id: image_id.clone(),
322      transform: XObjectTransform {
323        scale_x: Some(scale),
324        scale_y: Some(scale),
325        rotate: rotation,
326        ..Default::default()
327      },
328    });
329    self.save_page(index);
330  }
331  /// Generates pages for a file
332  pub fn process_file(
333    &mut self,
334    file: &Path,
335    highlighter_config: &HighlighterConfig,
336    index: usize,
337  ) -> Result<(), Box<dyn std::error::Error>> {
338    self.processed_file_count += 1;
339    match file.extension().and_then(OsStr::to_str) {
340      Some("jpg" | "jpeg" | "png" | "ico" | "bmp" | "webp") => {
341        self.generate_image_page(file, index);
342        Ok(())
343      }
344      _ => {
345        let mut highlighter = HighlightFile::new(
346          file,
347          &highlighter_config.syntax_set,
348          &highlighter_config.theme,
349        )?;
350
351        self.generate_highlighted_pages(&mut highlighter, file, highlighter_config, index);
352
353        Ok(())
354      }
355    }
356  }
357  /// Consumes entire walker
358  pub fn process_files(&mut self, walker: Walk, highlighter_config: HighlighterConfig) {
359    for result in walker {
360      match result {
361        Ok(entry) => {
362          if entry.file_type().is_some_and(|f| f.is_file()) {
363            if let Err(err) = self.process_file(entry.path(), &highlighter_config, 0) {
364              error!("ERROR: {}", err)
365            }
366          }
367        }
368        Err(err) => error!("ERROR: {}", err),
369      }
370    }
371  }
372
373  /// Returns number of files processed by [`CodeToPdf::process_files`]
374  pub fn processed_file_count(&self) -> usize {
375    self.processed_file_count
376  }
377}