use crate::error::{
GenerationError, GenerationErrorKind, IoError, IoErrorKind, KrikError, KrikResult,
};
use crate::parser::Document;
use crate::site::SiteConfig;
use chrono::Utc;
use rayon::prelude::*;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Mutex;
use tracing::{debug, info, warn};
use which::which;
use crate::I18nManager;
pub struct PdfGenerator {
pandoc_path: Option<PathBuf>,
typst_path: Option<PathBuf>,
}
impl PdfGenerator {
pub fn new() -> KrikResult<Self> {
let pandoc_path = Self::find_executable("pandoc").map_or_else(|| {
warn!("Pandoc not found in PATH. Install pandoc to enable PDF generation.");
None
}, |path| {
Some(path)
});
let typst_path = Self::find_executable("typst").map_or_else(|| {
warn!("Typst not found in PATH. Install typst to enable PDF generation.");
None
}, |path| {
Some(path)
});
debug!("Pandoc path: {}", pandoc_path.clone().unwrap().display());
debug!("Typst path: {}", typst_path.clone().unwrap().display());
Ok(Self {
pandoc_path,
typst_path,
})
}
pub fn is_available() -> bool {
Self::find_executable("pandoc").is_some() && Self::find_executable("typst").is_some()
}
fn find_executable(name: &str) -> Option<PathBuf> {
which(name).ok()
}
pub fn generate_pdf_from_file(
&self,
input_path: &Path,
output_path: &Path,
source_root: &Path,
site_config: &SiteConfig,
document_language: &str,
) -> KrikResult<()> {
if self.pandoc_path.is_none() {
warn!("Pandoc not found in PATH. Install pandoc to enable PDF generation.");
return Ok(());
}
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).map_err(|e| {
KrikError::Io(Box::new(IoError {
kind: IoErrorKind::WriteFailed(e),
path: parent.to_path_buf(),
context: "Creating PDF output directory".to_string(),
}))
})?;
}
let temp_md_file = self.create_filtered_markdown(
input_path,
output_path,
source_root,
site_config,
document_language,
)?;
let mut cmd = Command::new(&self.pandoc_path.clone().unwrap());
cmd.arg(&temp_md_file)
.arg("--from=gfm")
.arg("--pdf-engine=typst")
.arg("--output")
.arg(output_path)
.arg("--standalone")
.current_dir(source_root);
let output = cmd.output().map_err(|e| {
KrikError::Generation(Box::new(GenerationError {
kind: GenerationErrorKind::FeedError(format!("Failed to execute pandoc: {e}")),
context: "Running pandoc to generate PDF".to_string(),
}))
})?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(KrikError::Generation(Box::new(GenerationError {
kind: GenerationErrorKind::FeedError(format!("Pandoc failed: {stderr}")),
context: "Converting markdown to PDF with pandoc".to_string(),
})));
}
let _ = fs::remove_file(&temp_md_file);
Ok(())
}
fn create_filtered_markdown(
&self,
input_path: &Path,
output_path: &Path,
source_root: &Path,
site_config: &SiteConfig,
document_language: &str,
) -> KrikResult<PathBuf> {
let content = fs::read_to_string(input_path).map_err(|e| {
KrikError::Io(Box::new(IoError {
kind: IoErrorKind::ReadFailed(e),
path: input_path.to_path_buf(),
context: "Reading markdown file for PDF generation".to_string(),
}))
})?;
let (front_matter, markdown_content) = self.parse_front_matter(&content)?;
let content_with_fixed_paths =
self.resolve_relative_image_paths(&markdown_content, input_path, source_root)?;
let mut filtered_content = String::new();
if let Some(title) = &front_matter.title {
if !content_with_fixed_paths.trim_start().starts_with("# ") {
filtered_content.push_str(&format!("# {title}\n\n"));
}
}
filtered_content.push_str(&content_with_fixed_paths);
if let Some(base_url) = site_config.get_base_url() {
let absolute_pdf_url = self.generate_absolute_pdf_url(output_path, &base_url);
filtered_content.push_str("\n\n---\n\n");
let doc_info_heading = I18nManager::translate_string("document_information", document_language);
filtered_content.push_str(&format!("## {doc_info_heading}\n\n"));
let download_text =
I18nManager::translate_string("document_downloaded_from", document_language);
filtered_content.push_str(&format!("{download_text} {absolute_pdf_url}\n\n"));
let generated_text = I18nManager::translate_string("generated_at", document_language);
let timestamp = Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
filtered_content.push_str(&format!("{generated_text} {timestamp}\n"));
}
static COUNTER: AtomicU64 = AtomicU64::new(0);
let unique = COUNTER.fetch_add(1, Ordering::Relaxed);
let safe_stem = input_path
.file_stem()
.map(|s| s.to_string_lossy())
.unwrap_or_default()
.replace(|c: char| !c.is_ascii_alphanumeric(), "_");
let temp_file = std::env::temp_dir().join(format!(
"krik_pdf_{}_{}_{}.md",
safe_stem,
std::process::id(),
unique
));
fs::write(&temp_file, filtered_content).map_err(|e| {
KrikError::Io(Box::new(IoError {
kind: IoErrorKind::WriteFailed(e),
path: temp_file.clone(),
context: "Writing temporary filtered markdown file".to_string(),
}))
})?;
Ok(temp_file)
}
fn parse_front_matter(
&self,
content: &str,
) -> KrikResult<(crate::parser::FrontMatter, String)> {
crate::parser::parse_markdown_with_frontmatter(content)
}
pub fn generate_absolute_pdf_url(&self, output_path: &Path, base_url: &str) -> String {
let relative_path = self.generate_relative_pdf_path(output_path);
let base_url_trimmed = base_url.trim_end_matches('/');
format!("{base_url_trimmed}{relative_path}")
}
pub fn generate_relative_pdf_path(&self, output_path: &Path) -> String {
if let Some(filename) = output_path.file_name() {
if let Some(parent) = output_path.parent() {
if let Some(parent_name) = parent.file_name() {
if parent_name == "_site" {
return format!("/{}", filename.to_string_lossy());
}
return format!(
"/{}/{}",
parent_name.to_string_lossy(),
filename.to_string_lossy()
);
}
}
format!("/{}", filename.to_string_lossy())
} else {
"/document.pdf".to_string()
}
}
fn resolve_relative_image_paths(
&self,
content: &str,
input_path: &Path,
source_root: &Path,
) -> KrikResult<String> {
use regex::Regex;
let img_regex =
Regex::new(r#"!\[([^]]*)]\(([^)]+?)(?:\s+["']([^"']*?)["'])?\)"#).map_err(|e| {
KrikError::Generation(Box::new(GenerationError {
kind: GenerationErrorKind::FeedError(format!(
"Failed to compile image regex: {e}"
)),
context: "Processing markdown image paths".to_string(),
}))
})?;
let mut fixed_content = content.to_string();
let input_dir = input_path.parent().unwrap_or(Path::new(""));
let matches: Vec<_> = img_regex.find_iter(content).collect();
for img_match in matches.iter().rev() {
let full_match = img_match.as_str();
if let Some(caps) = img_regex.captures(full_match) {
let alt_text = caps.get(1).map_or("", |m| m.as_str());
let original_path = caps.get(2).map_or("", |m| m.as_str());
let title = caps.get(3).map(|m| m.as_str());
if !original_path.starts_with("http")
&& !original_path.starts_with("/")
&& !original_path.is_empty()
{
let resolved_path =
self.resolve_relative_path(original_path, input_dir, source_root);
let replacement = if let Some(title_text) = title {
format!("")
} else {
format!("")
};
let start = img_match.start();
let end = img_match.end();
fixed_content.replace_range(start..end, &replacement);
}
}
}
Ok(fixed_content)
}
pub fn resolve_relative_path(
&self,
relative_path: &str,
input_dir: &Path,
source_root: &Path,
) -> String {
let canonical_input_dir =
fs::canonicalize(input_dir).unwrap_or_else(|_| input_dir.to_path_buf());
let canonical_source_root =
fs::canonicalize(source_root).unwrap_or_else(|_| source_root.to_path_buf());
let input_relative_to_source =
match canonical_input_dir.strip_prefix(&canonical_source_root) {
Ok(relative) => relative.to_path_buf(),
Err(_) => {
let joined = canonical_input_dir.join(relative_path);
return self
.normalize_path(&joined)
.to_string_lossy()
.replace('\\', "/");
}
};
let resolved_path = input_relative_to_source.join(relative_path);
let normalized = self.normalize_path(&resolved_path);
normalized.to_string_lossy().replace('\\', "/")
}
pub fn normalize_path(&self, path: &Path) -> PathBuf {
let mut result = PathBuf::new();
for component in path.components() {
match component {
std::path::Component::Normal(name) => result.push(name),
std::path::Component::ParentDir => {
result.pop();
}
std::path::Component::CurDir => {
}
_ => result.push(component),
}
}
result
}
pub fn generate_pdfs(
&self,
documents: &[Document],
source_dir: &Path,
output_dir: &Path,
site_config: &SiteConfig,
) -> KrikResult<Vec<PathBuf>> {
let pdf_documents: Vec<&Document> = documents
.iter()
.filter(|doc| doc.front_matter.pdf.unwrap_or(false))
.collect();
if pdf_documents.is_empty() {
info!("No documents marked for PDF generation (pdf: true)");
return Ok(Vec::new());
}
info!(
"Generating PDFs for {} documents marked with pdf: true",
pdf_documents.len()
);
let canonical_source_dir = fs::canonicalize(source_dir).map_err(|e| {
KrikError::Io(Box::new(IoError {
kind: IoErrorKind::ReadFailed(e),
path: source_dir.to_path_buf(),
context: "Canonicalizing source directory path".to_string(),
}))
})?;
let project_root = canonical_source_dir
.parent()
.unwrap_or(&canonical_source_dir)
.to_path_buf();
let results: Mutex<Vec<PathBuf>> = Mutex::new(Vec::with_capacity(pdf_documents.len()));
pdf_documents.par_iter().for_each(|document| {
let input_path = source_dir.join(&document.file_path);
let output_path = self.determine_pdf_output_path(document, output_dir);
match self.generate_pdf_from_file(
&input_path,
&output_path,
&project_root,
site_config,
&document.language,
) {
Ok(()) => {
info!("Generated PDF: {}", output_path.display());
if let Ok(mut guard) = results.lock() {
guard.push(output_path);
}
}
Err(e) => {
warn!(
"Warning: Failed to generate PDF for {}: {}",
document.file_path, e
);
}
}
});
let mut generated = results.into_inner().unwrap_or_default();
generated.sort();
Ok(generated)
}
fn determine_pdf_output_path(&self, document: &Document, output_dir: &Path) -> PathBuf {
let mut path = PathBuf::from(&document.file_path);
path.set_extension("pdf");
output_dir.join(path)
}
pub fn version_info(&self) -> KrikResult<(String, String)> {
let pandoc_version = self.get_tool_version(&self.pandoc_path.clone().unwrap(), &["--version"])?;
let typst_version = self.get_tool_version(&self.typst_path.clone().unwrap(), &["--version"])?;
Ok((pandoc_version, typst_version))
}
fn get_tool_version(&self, tool_path: &Path, args: &[&str]) -> KrikResult<String> {
let output = Command::new(tool_path).args(args).output().map_err(|e| {
KrikError::Generation(Box::new(GenerationError {
kind: GenerationErrorKind::FeedError(format!(
"Failed to get version for {}: {}",
tool_path.display(),
e
)),
context: "Getting tool version information".to_string(),
}))
})?;
if output.status.success() {
let version_output = String::from_utf8_lossy(&output.stdout);
let first_line = version_output.lines().next().unwrap_or("Unknown").trim();
Ok(first_line.to_string())
} else {
Err(KrikError::Generation(Box::new(GenerationError {
kind: GenerationErrorKind::FeedError(format!(
"Failed to get version for {}",
tool_path.display()
)),
context: "Getting tool version information".to_string(),
})))
}
}
}