1use std::path::{Path, PathBuf};
11
12use fleischwolf_pdf::Pipeline;
13
14const IMAGE_EXTS: &[&str] = &["png", "jpg", "jpeg", "tif", "tiff", "bmp", "gif", "webp"];
15
16fn is_supported(p: &Path) -> bool {
17 matches!(
18 p.extension().and_then(|e| e.to_str()),
19 Some(e) if e == "pdf" || e == "gz" || IMAGE_EXTS.contains(&e)
20 )
21}
22
23fn find_pdfs(dir: &Path, out: &mut Vec<PathBuf>) {
24 let Ok(entries) = std::fs::read_dir(dir) else {
25 return;
26 };
27 let mut entries: Vec<_> = entries.flatten().map(|e| e.path()).collect();
28 entries.sort();
29 for p in entries {
30 if p.is_dir() {
31 if p.file_name().is_some_and(|n| n == "large") {
33 continue;
34 }
35 find_pdfs(&p, out);
36 } else if is_supported(&p) {
37 out.push(p);
38 }
39 }
40}
41
42fn main() {
43 let mut args = std::env::args().skip(1);
44 let root = PathBuf::from(args.next().expect("usage: snapshot <root> <outdir>"));
45 let outdir = PathBuf::from(args.next().expect("usage: snapshot <root> <outdir>"));
46
47 let mut pdfs = Vec::new();
48 find_pdfs(&root, &mut pdfs);
49
50 let mut pipeline = Pipeline::new().expect("load pipeline");
51 let (mut ok, mut err) = (0u32, 0u32);
52 for pdf in &pdfs {
53 let rel = pdf.strip_prefix(&root).unwrap_or(pdf);
54 let name = pdf.file_name().unwrap().to_string_lossy().to_string();
55 let md = match std::fs::read(pdf)
56 .map_err(|e| format!("read: {e}"))
57 .and_then(|bytes| {
58 let ext = pdf.extension().and_then(|e| e.to_str()).unwrap_or("");
59 let result = if ext == "gz" {
60 fleischwolf_pdf::convert_mets_gbs(&bytes, &name)
61 } else if IMAGE_EXTS.contains(&ext) {
62 pipeline.convert_image(&bytes, &name)
63 } else {
64 pipeline.convert(&bytes, None, &name)
65 };
66 result
67 .map(|d| d.export_to_markdown())
68 .map_err(|e| e.to_string())
69 }) {
70 Ok(md) => {
71 ok += 1;
72 md
73 }
74 Err(e) => {
75 err += 1;
76 eprintln!("ERR {}: {e}", rel.display());
77 format!("ERROR: {e}\n")
78 }
79 };
80 let mut dest = outdir.join(rel).into_os_string();
82 dest.push(".md");
83 let dest = PathBuf::from(dest);
84 std::fs::create_dir_all(dest.parent().unwrap()).expect("mkdir");
85 std::fs::write(&dest, md).expect("write snapshot");
86 }
87 eprintln!("snapshots: {} ok, {} error, {} total", ok, err, pdfs.len());
88}