Skip to main content

gitprint/
lib.rs

1//! # gitprint
2//!
3//! Convert git repositories into syntax-highlighted, printer-friendly PDFs.
4//!
5//! The main entry point is [`run()`], which executes the full pipeline:
6//! git repository inspection, file filtering, syntax highlighting, and PDF generation.
7
8pub mod cli;
9pub mod defaults;
10pub mod filter;
11pub mod git;
12pub mod highlight;
13pub mod pdf;
14pub mod types;
15
16use std::path::{Path, PathBuf};
17use std::sync::Arc;
18
19use anyhow::bail;
20
21use crate::types::{Config, HighlightedLine};
22
23/// A processed file ready for PDF rendering.
24struct ProcessedFile {
25    path: PathBuf,
26    lines: Vec<HighlightedLine>,
27    line_count: usize,
28    /// Pre-formatted size string, computed once to avoid calling format_size twice.
29    size_str: String,
30    last_modified: String,
31}
32
33fn format_size(bytes: u64) -> String {
34    if bytes < 1024 {
35        format!("{bytes} B")
36    } else if bytes < 1024 * 1024 {
37        format!("{:.1} KB", bytes as f64 / 1024.0)
38    } else {
39        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
40    }
41}
42
43fn format_elapsed(elapsed: std::time::Duration) -> String {
44    if elapsed.as_millis() < 1000 {
45        format!("{}ms", elapsed.as_millis())
46    } else {
47        format!("{:.1}s", elapsed.as_secs_f64())
48    }
49}
50
51/// Runs the full gitprint pipeline and writes a PDF to `config.output_path`.
52///
53/// Accepts a single file, a git repository (optionally scoped to a subdirectory),
54/// or a plain directory. The output always goes to `config.output_path`.
55///
56/// # Errors
57///
58/// Returns an error if the path does not exist, git operations fail, the theme is
59/// invalid, or writing the PDF fails.
60///
61/// # Examples
62///
63/// ```ignore
64/// use gitprint::types::{Config, PaperSize};
65/// use std::path::PathBuf;
66///
67/// let config = Config {
68///     repo_path: PathBuf::from("."),
69///     output_path: PathBuf::from("out.pdf"),
70///     // ... other fields
71/// #   include_patterns: vec![],
72/// #   exclude_patterns: vec![],
73/// #   theme: "InspiredGitHub".to_string(),
74/// #   font_size: 8.0,
75/// #   no_line_numbers: false,
76/// #   toc: true,
77/// #   file_tree: true,
78/// #   branch: None,
79/// #   commit: None,
80/// #   paper_size: PaperSize::A4,
81/// #   landscape: false,
82/// };
83/// gitprint::run(&config).await.unwrap();
84/// ```
85///
86/// **Concurrency model**:
87/// - Single-file mode: highlighter init (CPU, `spawn_blocking`) runs concurrently with
88///   file content read and last-modified date fetch (both I/O).
89/// - Multi-file mode: git metadata, tracked-file list, date map, and highlighter init
90///   all run concurrently via `tokio::join!`; highlighter uses `spawn_blocking` to keep
91///   tokio worker threads free for I/O.
92/// - File reads use a tokio `JoinSet` (I/O-bound parallelism).
93/// - Syntax highlighting uses a tokio `JoinSet` of `spawn_blocking` tasks — one per file
94///   — so all files are highlighted concurrently across the blocking thread pool (CPU-bound).
95/// - Cover, TOC, and tree PDF renders are sequential (each < 5 ms; not worth the overhead).
96pub async fn run(config: &Config) -> anyhow::Result<()> {
97    let start = std::time::Instant::now();
98
99    let info = git::verify_repo(&config.repo_path).await?;
100
101    // Single-file mode: no cover page, TOC, or file tree — just render the file.
102    if let Some(ref single_file) = info.single_file {
103        // Highlighter init (CPU, spawn_blocking) overlaps with two I/O calls.
104        let theme = config.theme.clone();
105        let (highlighter_res, content_res, last_modified) = tokio::join!(
106            tokio::task::spawn_blocking(move || highlight::Highlighter::new(&theme)),
107            git::read_file_content(&info.root, single_file, config),
108            git::file_last_modified(&info.root, single_file, config, info.is_git),
109        );
110        let highlighter =
111            highlighter_res.map_err(|e| anyhow::anyhow!("highlighter panicked: {e}"))??;
112        let content = content_res?;
113
114        if filter::is_binary(content.as_bytes()) || filter::is_minified(&content) {
115            bail!("{}: binary or minified file", single_file.display());
116        }
117        let line_count = content.lines().count();
118        let size_str = format_size(content.len() as u64);
119        let lines: Vec<HighlightedLine> =
120            highlighter.highlight_lines(&content, single_file).collect();
121
122        let mut doc = printpdf::PdfDocument::new("gitprint");
123        let fonts = pdf::fonts::load_fonts(&mut doc)?;
124        let mut builder = pdf::create_builder(config, fonts);
125        let file_info = format!("{line_count} LOC \u{00B7} {size_str} \u{00B7} {last_modified}");
126        pdf::code::render_file(
127            &mut builder,
128            &single_file.display().to_string(),
129            lines.into_iter(),
130            line_count,
131            !config.no_line_numbers,
132            config.font_size as u8,
133            &file_info,
134        );
135        let pages = builder.finish();
136        let total_pages = pages.len();
137        doc.with_pages(pages);
138        pdf::save_pdf(&doc, &config.output_path).await?;
139
140        let elapsed = start.elapsed();
141        let pdf_size = tokio::fs::metadata(&config.output_path)
142            .await
143            .map(|m| m.len())
144            .unwrap_or(0);
145        eprintln!(
146            "{} — 1 file, {} pages, {}, {}",
147            config.output_path.display(),
148            total_pages,
149            format_size(pdf_size),
150            format_elapsed(elapsed),
151        );
152        return Ok(());
153    }
154
155    let repo_path = info.root;
156    let is_git = info.is_git;
157    let scope = info.scope;
158
159    // Parallel: git metadata + tracked file list + date map + highlighter init.
160    // Highlighter::new is CPU-bound (syntect deserialization); spawn_blocking keeps
161    // tokio worker threads free for the concurrent I/O-bound git calls.
162    let theme = config.theme.clone();
163    let (metadata_res, all_paths_res, date_map_res, highlighter_res) = tokio::join!(
164        git::get_metadata(&repo_path, config, is_git, scope.as_deref()),
165        git::list_tracked_files(&repo_path, config, is_git, scope.as_deref()),
166        git::file_last_modified_dates(&repo_path, config, is_git, scope.as_deref()),
167        tokio::task::spawn_blocking(move || highlight::Highlighter::new(&theme)),
168    );
169
170    let mut metadata = metadata_res?;
171    let highlighter =
172        Arc::new(highlighter_res.map_err(|e| anyhow::anyhow!("highlighter panicked: {e}"))??);
173    let date_map = Arc::new(date_map_res?);
174
175    let file_filter = filter::FileFilter::new(&config.include_patterns, &config.exclude_patterns)?;
176    let mut paths: Vec<_> = file_filter.filter_paths(all_paths_res?).collect();
177    paths.sort_unstable();
178
179    // Phase 1 — I/O: read all file contents concurrently with tokio.
180    let mut read_set: tokio::task::JoinSet<Option<(PathBuf, String, String)>> =
181        tokio::task::JoinSet::new();
182    paths.into_iter().for_each(|path| {
183        let repo = repo_path.clone();
184        let cfg = config.clone();
185        let dates = Arc::clone(&date_map);
186        read_set.spawn(async move {
187            let content = read_text_file(&repo, &path, &cfg).await?;
188            let last_modified = dates.get(&path).cloned().unwrap_or_default();
189            Some((path, content, last_modified))
190        });
191    });
192    let raw_files: Vec<(PathBuf, String, String)> =
193        read_set.join_all().await.into_iter().flatten().collect();
194
195    // Phase 2 — CPU: highlight each file in a dedicated blocking task so all files
196    // are processed concurrently across tokio's blocking thread pool.
197    let mut highlight_set: tokio::task::JoinSet<ProcessedFile> = tokio::task::JoinSet::new();
198    raw_files
199        .into_iter()
200        .for_each(|(path, content, last_modified)| {
201            let hl = Arc::clone(&highlighter);
202            highlight_set.spawn_blocking(move || {
203                let line_count = content.lines().count();
204                let size_str = format_size(content.len() as u64);
205                let lines: Vec<HighlightedLine> = hl.highlight_lines(&content, &path).collect();
206                ProcessedFile {
207                    path,
208                    lines,
209                    line_count,
210                    size_str,
211                    last_modified,
212                }
213            });
214        });
215    let mut files: Vec<ProcessedFile> = highlight_set.join_all().await;
216
217    files.sort_unstable_by(|a, b| a.path.cmp(&b.path));
218
219    metadata.file_count = files.len();
220    metadata.total_lines = files.iter().map(|f| f.line_count).sum();
221
222    // Build PDF document and load fonts once.
223    let mut doc = printpdf::PdfDocument::new("gitprint");
224    let fonts = pdf::fonts::load_fonts(&mut doc)?;
225
226    // Collect paths and build dummy TOC entries before the parallel render phase.
227    let tree_paths: Vec<PathBuf> = files.iter().map(|f| f.path.clone()).collect();
228
229    // Dummy TOC entries (start_page=0) used purely to count how many pages the TOC occupies.
230    // Each entry is one line regardless of content, so page count is stable.
231    let dummy_toc_entries: Vec<pdf::toc::TocEntry> = files
232        .iter()
233        .map(|f| pdf::toc::TocEntry {
234            path: f.path.clone(),
235            line_count: f.line_count,
236            size_str: f.size_str.clone(),
237            last_modified: f.last_modified.clone(),
238            start_page: 0,
239        })
240        .collect();
241
242    let cover_pages = {
243        let mut b = pdf::create_builder(config, fonts.clone());
244        pdf::cover::render(&mut b, &metadata);
245        b.finish()
246    };
247    let toc_count = if config.toc {
248        let mut b = pdf::create_builder(config, fonts.clone());
249        pdf::toc::render(&mut b, &dummy_toc_entries);
250        b.finish().len()
251    } else {
252        0
253    };
254    let tree_count = if config.file_tree {
255        let mut b = pdf::create_builder(config, fonts.clone());
256        pdf::tree::render(&mut b, &tree_paths);
257        b.finish().len()
258    } else {
259        0
260    };
261    let cover_count = cover_pages.len();
262
263    // Render file content sequentially, tracking each file's starting page.
264    let file_base_page = cover_count + toc_count + tree_count + 1;
265    let mut content_builder = pdf::create_builder_at_page(config, fonts.clone(), file_base_page);
266    let mut toc_entries: Vec<pdf::toc::TocEntry> = Vec::with_capacity(files.len());
267
268    files.into_iter().for_each(|file| {
269        let start_page = content_builder.current_page();
270        let info = format!(
271            "{} LOC \u{00B7} {} \u{00B7} {}",
272            file.line_count, file.size_str, file.last_modified
273        );
274        toc_entries.push(pdf::toc::TocEntry {
275            path: file.path.clone(),
276            line_count: file.line_count,
277            size_str: file.size_str,
278            last_modified: file.last_modified.clone(),
279            start_page,
280        });
281        pdf::code::render_file(
282            &mut content_builder,
283            &file.path.display().to_string(),
284            file.lines.into_iter(),
285            file.line_count,
286            !config.no_line_numbers,
287            config.font_size as u8,
288            &info,
289        );
290    });
291    let content_pages = content_builder.finish();
292
293    let toc_pages = if config.toc {
294        let mut b = pdf::create_builder_at_page(config, fonts.clone(), cover_count + 1);
295        pdf::toc::render(&mut b, &toc_entries);
296        b.finish()
297    } else {
298        vec![]
299    };
300    let tree_pages = if config.file_tree {
301        let mut b = pdf::create_builder_at_page(config, fonts.clone(), cover_count + toc_count + 1);
302        pdf::tree::render(&mut b, &tree_paths);
303        b.finish()
304    } else {
305        vec![]
306    };
307
308    // Assemble final document: cover → TOC → tree → file content.
309    let all_pages: Vec<_> = cover_pages
310        .into_iter()
311        .chain(toc_pages)
312        .chain(tree_pages)
313        .chain(content_pages)
314        .collect();
315    let total_pages = all_pages.len();
316
317    doc.with_pages(all_pages);
318    pdf::save_pdf(&doc, &config.output_path).await?;
319
320    let elapsed = start.elapsed();
321    let pdf_size = tokio::fs::metadata(&config.output_path)
322        .await
323        .map(|m| m.len())
324        .unwrap_or(0);
325
326    eprintln!(
327        "{} — {} files, {} pages, {}, {}",
328        config.output_path.display(),
329        metadata.file_count,
330        total_pages,
331        format_size(pdf_size),
332        format_elapsed(elapsed),
333    );
334
335    Ok(())
336}
337
338async fn read_text_file(repo_path: &Path, path: &Path, config: &Config) -> Option<String> {
339    git::read_file_content(repo_path, path, config)
340        .await
341        .ok()
342        .filter(|c| !filter::is_binary(c.as_bytes()))
343        .filter(|c| !filter::is_minified(c))
344}