code-to-pdf 0.2.3

Generates a syntax-highlighted PDF of your source code
Documentation
//! Decodes a PDF generated by `c2pdf` and generates the corresponding folders and files
use std::{fs, iter::Peekable, mem, path::PathBuf};

use argh::FromArgs;
use printpdf::{self, Op, PdfPage, TextItem};
type Section = Vec<String>;
type Sections = Vec<Section>;
fn parse_sections(page: &PdfPage) -> Sections {
  let mut sections: Vec<Vec<String>> = vec![];
  // Set of lines for the current section
  let mut current_section: Vec<String> = vec![];
  let mut current_line: String = String::new();
  let mut split_on_line_break = true;
  // Sort text into sets of lines for each position
  for op in &page.ops {
    match op {
      // `SetTextMatrix` or `SetTextCursor` should create a new section
      Op::SetTextMatrix { matrix: _ } | Op::SetTextCursor { pos: _ } => {
        // Ensure that the current line is pushed to this section
        if !current_line.is_empty() {
          current_section.push(mem::take(&mut current_line));
        }
        if !current_section.is_empty() {
          let section = mem::take(&mut current_section);
          sections.push(section);
        }
      }
      // Op::SetTextCursor { pos } => {}
      Op::WriteText { items, font: _font } => {
        for item in items {
          if let TextItem::Text(text) = item {
            let trimmed = text.trim_matches(['\r', '\n']);
            current_line.push_str(trimmed);
            if trimmed.len() < text.len() {
              split_on_line_break = false;
            }
          }
        }
      }
      Op::AddLineBreak => {
        // if !current_line.is_empty() {
        if split_on_line_break {
          let mut line = mem::take(&mut current_line);
          line.push('\n');
          current_section.push(line);
        }
        split_on_line_break = true;
        // }
      }
      _ => {}
    }
  }
  if !current_line.is_empty() {
    current_section.push(current_line);
  }
  if !current_section.is_empty() {
    sections.push(current_section);
  }
  // dbg!(&sections);
  sections
}
#[derive(Debug)]
struct PageData {
  path: PathBuf,
  custom_text: Option<String>,
  contents: String,
}
impl PageData {
  fn parse_path(path_section: &[String]) -> PathBuf {
    let path_str = path_section.join("");
    PathBuf::from(path_str.trim())
  }
  fn parse_contents(contents_section: &[String]) -> String {
    contents_section.join("")
  }
  pub fn parse_from_sections(sections: Vec<Vec<String>>) -> Result<Self, String> {
    match sections.len() {
      // No custom text
      2 => {
        let path = Self::parse_path(&sections[0]);
        let contents = Self::parse_contents(&sections[1]);

        Ok(Self {
          path,
          custom_text: None,
          contents,
        })
      }
      // Custom text
      3 => {
        let path = Self::parse_path(&sections[0]);
        let contents = Self::parse_contents(&sections[2]);
        Ok(Self {
          path,
          custom_text: None,
          contents,
        })
      }
      _ => {
        dbg!(&sections);
        Err(format!(
          "Malformed Page. Sections length should be 2 or 3, but it was {}",
          sections.len()
        ))
      }
    }
  }
}
#[derive(Debug)]
struct FileData {
  path: PathBuf,
  contents: String,
}
fn next_file_data<I: Iterator<Item = Result<PageData, String>>>(
  pages: &mut Peekable<I>,
) -> Option<Result<FileData, String>> {
  let page = match pages.next()? {
    Ok(p) => p,
    Err(e) => return Some(Err(e)),
  };
  let PageData {
    path,
    mut contents,
    custom_text: _custom_text,
  } = page;
  while let Some(Ok(peeked)) = pages.peek() {
    if peeked.path == path {
      // Safe because the next item in the iterator has already been checked
      let data = pages.next().unwrap().unwrap();
      let mut c = data.contents;
      c.push('\n');
      contents.push_str(&c);
    } else {
      break;
    }
  }

  Some(Ok(FileData { path, contents }))
}
#[derive(FromArgs)]
/// Decodes a PDF generated via `c2pdf` into the original directory tree
struct Arguments {
  /// path of the PDF to decode
  #[argh(positional)]
  pdf_path: PathBuf,

  /// directory to write decoded data to
  #[argh(option, default = "PathBuf::from(\"./generated\")")]
  out_dir: PathBuf,
}
fn main() {
  let args: Arguments = argh::from_env();
  let Arguments { pdf_path, out_dir } = args;
  let pdf_bytes = fs::read(pdf_path).unwrap();
  let doc = printpdf::PdfDocument::parse(&pdf_bytes, &Default::default(), &mut vec![]).unwrap();
  let mut pages_iterator = doc
    .pages
    .iter()
    .map(|p| PageData::parse_from_sections(parse_sections(p)))
    .peekable();
  while let Some(file_data_result) = next_file_data(&mut pages_iterator) {
    let file_data = match file_data_result {
      Ok(data) => data,
      Err(err) => {
        eprintln!("{}", err);
        continue;
      }
    };

    let base = &out_dir;
    let file_path = base.join(file_data.path);
    fs::create_dir_all(file_path.parent().unwrap()).unwrap();

    fs::write(file_path, file_data.contents).unwrap();
  }
}