Skip to main content

gitprint/
lib.rs

1//! # gitprint
2//!
3//! Convert git repositories into syntax-highlighted, printer-friendly PDFs.
4//!
5//! The main entry point is [`run()`], which executes the full pipeline:
6//! git repository inspection, file filtering, syntax highlighting, and PDF generation.
7
8pub mod cli;
9pub mod defaults;
10pub mod filter;
11pub mod git;
12pub mod github;
13pub mod highlight;
14pub mod pdf;
15pub mod preview;
16pub mod types;
17pub mod user_report;
18
19use std::path::{Path, PathBuf};
20use std::sync::Arc;
21
22use anyhow::bail;
23
24use crate::types::{Config, HighlightedLine};
25
26/// A processed file ready for PDF rendering.
27struct ProcessedFile {
28    path: PathBuf,
29    lines: Vec<HighlightedLine>,
30    line_count: usize,
31    /// Pre-formatted size string, computed once to avoid calling format_size twice.
32    size_str: String,
33    last_modified: String,
34}
35
36pub(crate) fn format_size(bytes: u64) -> String {
37    if bytes < 1024 {
38        format!("{bytes} B")
39    } else if bytes < 1024 * 1024 {
40        format!("{:.1} KB", bytes as f64 / 1024.0)
41    } else {
42        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
43    }
44}
45
46/// Formats the current UTC time as `YYYY-MM-DD HH:MM:SS UTC`.
47///
48/// Uses Howard Hinnant's Euclidean Gregorian algorithm — no external crate needed.
49pub(crate) fn format_utc_now() -> String {
50    let total_secs = std::time::SystemTime::now()
51        .duration_since(std::time::UNIX_EPOCH)
52        .map(|d| d.as_secs())
53        .unwrap_or(0);
54
55    let (h, m, s) = (
56        (total_secs / 3600) % 24,
57        (total_secs / 60) % 60,
58        total_secs % 60,
59    );
60
61    let z = (total_secs / 86400) as i64 + 719_468;
62    let era = z.div_euclid(146_097);
63    let doe = z - era * 146_097;
64    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
65    let y = yoe + era * 400;
66    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
67    let mp = (5 * doy + 2) / 153;
68    let d = doy - (153 * mp + 2) / 5 + 1;
69    let mo = if mp < 10 { mp + 3 } else { mp - 9 };
70    let y = if mo <= 2 { y + 1 } else { y };
71
72    format!("{y:04}-{mo:02}-{d:02} {h:02}:{m:02}:{s:02} UTC")
73}
74
75fn format_elapsed(elapsed: std::time::Duration) -> String {
76    if elapsed.as_millis() < 1000 {
77        format!("{}ms", elapsed.as_millis())
78    } else {
79        format!("{:.1}s", elapsed.as_secs_f64())
80    }
81}
82
83/// Runs the full gitprint pipeline and writes a PDF to `config.output_path`.
84///
85/// Accepts a single file, a git repository (optionally scoped to a subdirectory),
86/// or a plain directory. The output always goes to `config.output_path`.
87///
88/// # Errors
89///
90/// Returns an error if the path does not exist, git operations fail, the theme is
91/// invalid, or writing the PDF fails.
92///
93/// # Examples
94///
95/// ```ignore
96/// use gitprint::types::{Config, PaperSize};
97/// use std::path::PathBuf;
98///
99/// let config = Config {
100///     repo_path: PathBuf::from("."),
101///     output_path: PathBuf::from("out.pdf"),
102///     // ... other fields
103/// #   include_patterns: vec![],
104/// #   exclude_patterns: vec![],
105/// #   theme: "InspiredGitHub".to_string(),
106/// #   font_size: 8.0,
107/// #   no_line_numbers: false,
108/// #   toc: true,
109/// #   file_tree: true,
110/// #   branch: None,
111/// #   commit: None,
112/// #   paper_size: PaperSize::A4,
113/// #   landscape: false,
114/// };
115/// gitprint::run(&config).await.unwrap();
116/// ```
117///
118/// **Concurrency model**:
119/// - Single-file mode: highlighter init (CPU, `spawn_blocking`) runs concurrently with
120///   file content read and last-modified date fetch (both I/O).
121/// - Multi-file mode: git metadata, tracked-file list, date map, and highlighter init
122///   all run concurrently via `tokio::join!`; highlighter uses `spawn_blocking` to keep
123///   tokio worker threads free for I/O.
124/// - File reads use a tokio `JoinSet` (I/O-bound parallelism).
125/// - Syntax highlighting uses a tokio `JoinSet` of `spawn_blocking` tasks — one per file
126///   — so all files are highlighted concurrently across the blocking thread pool (CPU-bound).
127/// - Cover, TOC, and tree PDF renders are sequential (each < 5 ms; not worth the overhead).
128pub async fn run(config: &Config) -> anyhow::Result<()> {
129    let start = std::time::Instant::now();
130
131    let info = git::verify_repo(&config.repo_path).await?;
132
133    // Single-file mode: no cover page, TOC, or file tree — just render the file.
134    if let Some(ref single_file) = info.single_file {
135        // Highlighter init (CPU, spawn_blocking) overlaps with two I/O calls.
136        let theme = config.theme.clone();
137        let (highlighter_res, content_res, last_modified) = tokio::join!(
138            tokio::task::spawn_blocking(move || highlight::Highlighter::new(&theme)),
139            git::read_file_content(&info.root, single_file, config),
140            git::file_last_modified(&info.root, single_file, config, info.is_git),
141        );
142        let highlighter =
143            highlighter_res.map_err(|e| anyhow::anyhow!("highlighter panicked: {e}"))??;
144        let content = content_res?;
145
146        if filter::is_binary(content.as_bytes()) || filter::is_minified(&content) {
147            bail!("{}: binary or minified file", single_file.display());
148        }
149        let line_count = content.lines().count();
150        let size_str = format_size(content.len() as u64);
151        let lines: Vec<HighlightedLine> =
152            highlighter.highlight_lines(&content, single_file).collect();
153
154        let doc_title = config
155            .remote_url
156            .as_deref()
157            .map(git::repo_name_from_url)
158            .unwrap_or_else(|| {
159                config
160                    .repo_path
161                    .file_name()
162                    .map(|n| n.to_string_lossy().to_string())
163                    .unwrap_or_else(|| "gitprint".to_string())
164            });
165        let mut doc = printpdf::PdfDocument::new(&doc_title);
166        let fonts = pdf::fonts::load_fonts(&mut doc)?;
167        let mut builder = pdf::create_builder(config, fonts);
168        let file_info = format!("{line_count} LOC \u{00B7} {size_str} \u{00B7} {last_modified}");
169        let header_url = config.remote_url.as_ref().map(|url| {
170            let base = url.trim_end_matches(".git");
171            format!("{base}/blob/HEAD/{}", single_file.display())
172        });
173        pdf::code::render_file(
174            &mut builder,
175            &single_file.display().to_string(),
176            lines.into_iter(),
177            line_count,
178            !config.no_line_numbers,
179            config.font_size as u8,
180            &file_info,
181            header_url.as_deref(),
182        );
183        let pages = builder.finish();
184        let total_pages = pages.len();
185        doc.with_pages(pages);
186        pdf::save_pdf(&doc, &config.output_path).await?;
187
188        let elapsed = start.elapsed();
189        let pdf_size = tokio::fs::metadata(&config.output_path)
190            .await
191            .map(|m| m.len())
192            .unwrap_or(0);
193        eprintln!(
194            "{} — 1 file, {} pages, {}, {}",
195            config.output_path.display(),
196            total_pages,
197            format_size(pdf_size),
198            format_elapsed(elapsed),
199        );
200        return Ok(());
201    }
202
203    let repo_path = info.root;
204    let is_git = info.is_git;
205    let scope = info.scope;
206
207    // Parallel: git metadata + tracked file list + date map + highlighter init
208    // + fs owner/group + repo disk size (for local paths).
209    // Highlighter::new is CPU-bound (syntect deserialization); spawn_blocking keeps
210    // tokio worker threads free for the concurrent I/O-bound git calls.
211    let theme = config.theme.clone();
212    let fs_path = config.repo_path.clone();
213    let fs_path2 = repo_path.clone();
214    let is_remote = config.remote_url.is_some();
215    let generated_at = format_utc_now();
216    let repo_path_for_git_size = repo_path.clone();
217    let config_for_git_size = config.clone();
218    let (
219        metadata_res,
220        all_paths_res,
221        date_map_res,
222        highlighter_res,
223        fs_owner_group,
224        git_repo_size,
225        fs_size,
226    ) = tokio::join!(
227        git::get_metadata(&repo_path, config, is_git, scope.as_deref()),
228        git::list_tracked_files(&repo_path, config, is_git, scope.as_deref()),
229        git::file_last_modified_dates(&repo_path, config, is_git, scope.as_deref()),
230        tokio::task::spawn_blocking(move || highlight::Highlighter::new(&theme)),
231        async move {
232            if is_remote {
233                (None, None)
234            } else {
235                git::fs_owner_group(&fs_path).await
236            }
237        },
238        async move {
239            if is_git {
240                git::git_tracked_size(&repo_path_for_git_size, &config_for_git_size).await
241            } else {
242                String::new()
243            }
244        },
245        async move {
246            if is_remote {
247                String::new()
248            } else {
249                git::fs_dir_size(&fs_path2).await
250            }
251        },
252    );
253
254    let mut metadata = metadata_res?;
255    if let Some(ref url) = config.remote_url {
256        metadata.name = git::repo_name_from_url(url);
257    }
258    metadata.fs_owner = fs_owner_group.0;
259    metadata.fs_group = fs_owner_group.1;
260    metadata.generated_at = generated_at;
261    metadata.repo_size = git_repo_size;
262    metadata.fs_size = fs_size;
263    if !is_remote {
264        metadata.repo_absolute_path = Some(repo_path.clone());
265    }
266    let highlighter =
267        Arc::new(highlighter_res.map_err(|e| anyhow::anyhow!("highlighter panicked: {e}"))??);
268    let date_map = Arc::new(date_map_res?);
269
270    let file_filter = filter::FileFilter::new(&config.include_patterns, &config.exclude_patterns)?;
271    let mut paths: Vec<_> = file_filter.filter_paths(all_paths_res?).collect();
272    paths.sort_unstable();
273
274    // Phase 1 — I/O: read all file contents concurrently with tokio.
275    let mut read_set: tokio::task::JoinSet<Option<(PathBuf, String, String)>> =
276        tokio::task::JoinSet::new();
277    paths.into_iter().for_each(|path| {
278        let repo = repo_path.clone();
279        let cfg = config.clone();
280        let dates = Arc::clone(&date_map);
281        read_set.spawn(async move {
282            let content = read_text_file(&repo, &path, &cfg).await?;
283            let last_modified = dates.get(&path).cloned().unwrap_or_default();
284            Some((path, content, last_modified))
285        });
286    });
287    let raw_files: Vec<(PathBuf, String, String)> =
288        read_set.join_all().await.into_iter().flatten().collect();
289
290    // Phase 2 — CPU: highlight each file in a dedicated blocking task so all files
291    // are processed concurrently across tokio's blocking thread pool.
292    let mut highlight_set: tokio::task::JoinSet<ProcessedFile> = tokio::task::JoinSet::new();
293    raw_files
294        .into_iter()
295        .for_each(|(path, content, last_modified)| {
296            let hl = Arc::clone(&highlighter);
297            highlight_set.spawn_blocking(move || {
298                let line_count = content.lines().count();
299                let size_str = format_size(content.len() as u64);
300                let lines: Vec<HighlightedLine> = hl.highlight_lines(&content, &path).collect();
301                ProcessedFile {
302                    path,
303                    lines,
304                    line_count,
305                    size_str,
306                    last_modified,
307                }
308            });
309        });
310    let mut files: Vec<ProcessedFile> = highlight_set.join_all().await;
311
312    files.sort_unstable_by(|a, b| a.path.cmp(&b.path));
313
314    metadata.file_count = files.len();
315    metadata.total_lines = files.iter().map(|f| f.line_count).sum();
316
317    // Build PDF document and load fonts once.
318    let mut doc = printpdf::PdfDocument::new(&metadata.name);
319    let fonts = pdf::fonts::load_fonts(&mut doc)?;
320
321    // Collect paths and build dummy TOC entries before the parallel render phase.
322    let tree_paths: Vec<PathBuf> = files.iter().map(|f| f.path.clone()).collect();
323
324    // Dummy TOC entries (start_page=0) used purely to count how many pages the TOC occupies.
325    // Each entry is one line regardless of content, so page count is stable.
326    let dummy_toc_entries: Vec<pdf::toc::TocEntry> = files
327        .iter()
328        .map(|f| pdf::toc::TocEntry {
329            path: f.path.clone(),
330            line_count: f.line_count,
331            size_str: f.size_str.clone(),
332            last_modified: f.last_modified.clone(),
333            start_page: 0,
334        })
335        .collect();
336
337    // For cover links: use explicit remote_url from CLI, or fall back to remote detected
338    // from git config so links work even when printing a local repo without --remote.
339    let effective_remote_url = config
340        .remote_url
341        .as_deref()
342        .or(metadata.detected_remote_url.as_deref());
343
344    let cover_pages = {
345        let mut b = pdf::create_builder(config, fonts.clone());
346        pdf::cover::render(&mut b, &metadata, effective_remote_url);
347        b.finish()
348    };
349    let toc_count = if config.toc {
350        let mut b = pdf::create_builder(config, fonts.clone());
351        pdf::toc::render(&mut b, &dummy_toc_entries);
352        b.finish().len()
353    } else {
354        0
355    };
356    let tree_count = if config.file_tree {
357        let mut b = pdf::create_builder(config, fonts.clone());
358        pdf::tree::render(&mut b, &tree_paths);
359        b.finish().len()
360    } else {
361        0
362    };
363    let cover_count = cover_pages.len();
364
365    // Render file content sequentially, tracking each file's starting page.
366    let file_base_page = cover_count + toc_count + tree_count + 1;
367    let mut content_builder = pdf::create_builder_at_page(config, fonts.clone(), file_base_page);
368    let mut toc_entries: Vec<pdf::toc::TocEntry> = Vec::with_capacity(files.len());
369
370    let remote_base = config.remote_url.as_ref().map(|url| {
371        let base = url.trim_end_matches(".git");
372        let commit = if metadata.commit_hash.is_empty() {
373            "HEAD"
374        } else {
375            &metadata.commit_hash
376        };
377        format!("{base}/blob/{commit}")
378    });
379
380    files.into_iter().for_each(|file| {
381        let start_page = content_builder.current_page();
382        let info = format!(
383            "{} LOC \u{00B7} {} \u{00B7} {}",
384            file.line_count, file.size_str, file.last_modified
385        );
386        toc_entries.push(pdf::toc::TocEntry {
387            path: file.path.clone(),
388            line_count: file.line_count,
389            size_str: file.size_str,
390            last_modified: file.last_modified.clone(),
391            start_page,
392        });
393        let header_url = remote_base
394            .as_ref()
395            .map(|base| format!("{base}/{}", file.path.display()));
396        pdf::code::render_file(
397            &mut content_builder,
398            &file.path.display().to_string(),
399            file.lines.into_iter(),
400            file.line_count,
401            !config.no_line_numbers,
402            config.font_size as u8,
403            &info,
404            header_url.as_deref(),
405        );
406    });
407    let content_pages = content_builder.finish();
408
409    let toc_pages = if config.toc {
410        let mut b = pdf::create_builder_at_page(config, fonts.clone(), cover_count + 1);
411        pdf::toc::render(&mut b, &toc_entries);
412        b.finish()
413    } else {
414        vec![]
415    };
416    let tree_pages = if config.file_tree {
417        let mut b = pdf::create_builder_at_page(config, fonts.clone(), cover_count + toc_count + 1);
418        pdf::tree::render(&mut b, &tree_paths);
419        b.finish()
420    } else {
421        vec![]
422    };
423
424    // Assemble final document: cover → TOC → tree → file content.
425    let all_pages: Vec<_> = cover_pages
426        .into_iter()
427        .chain(toc_pages)
428        .chain(tree_pages)
429        .chain(content_pages)
430        .collect();
431    let total_pages = all_pages.len();
432
433    doc.with_pages(all_pages);
434    pdf::save_pdf(&doc, &config.output_path).await?;
435
436    let elapsed = start.elapsed();
437    let pdf_size = tokio::fs::metadata(&config.output_path)
438        .await
439        .map(|m| m.len())
440        .unwrap_or(0);
441
442    eprintln!(
443        "{} — {} files, {} pages, {}, {}",
444        config.output_path.display(),
445        metadata.file_count,
446        total_pages,
447        format_size(pdf_size),
448        format_elapsed(elapsed),
449    );
450
451    Ok(())
452}
453
454async fn read_text_file(repo_path: &Path, path: &Path, config: &Config) -> Option<String> {
455    git::read_file_content(repo_path, path, config)
456        .await
457        .ok()
458        .filter(|c| !filter::is_binary(c.as_bytes()))
459        .filter(|c| !filter::is_minified(c))
460}