katana-canvas-forge 0.1.7

Versioned diagram rendering and document export runtime for KatanA (Mermaid, Draw.io, HTML/PDF/PNG/JPEG).
Documentation
use super::types::{
    ExportError, ExportFormat, ExportInput, ExportOutput, ExporterTrait, PdfExporter,
};
use super::{native_document::NativeHtmlDocument, native_document_image::NativeDocumentImage};
use crate::markdown::MarkdownError;

const IMAGE_SCALE_RATIO: f32 = 0.75;
const LETTER_PAGE_HEIGHT_RATIO: f32 = 11.0 / 8.5;
const PDF_PAGE_TREE_OBJECT_ID: usize = 3;

impl PdfExporter {
    pub fn is_available() -> bool {
        true
    }

    fn export_file(html: &str, output: &std::path::Path) -> Result<(), MarkdownError> {
        let document = NativeHtmlDocument::parse(html)?;
        let image = document.render_image()?;
        let pdf = NativePdfDocument::new(&image)?.to_bytes();
        std::fs::write(output, pdf).map_err(|error| MarkdownError::ExportFailed(error.to_string()))
    }
}

static PDF_FORMATS: &[ExportFormat] = &[ExportFormat::Pdf];

impl ExporterTrait for PdfExporter {
    fn export(&self, input: &ExportInput) -> Result<ExportOutput, ExportError> {
        if input.format != ExportFormat::Pdf {
            return Err(ExportError::UnsupportedFormat);
        }
        Self::export_file(&input.html_source, &input.output_path)
            .map(|()| ExportOutput {
                output_path: input.output_path.clone(),
                format: ExportFormat::Pdf,
            })
            .map_err(|e| ExportError::RenderFailed(e.to_string()))
    }

    fn supported_formats(&self) -> &[ExportFormat] {
        PDF_FORMATS
    }
}

struct NativePdfDocument {
    page_width: f32,
    page_height: f32,
    image_display_height: f32,
    jpeg: Vec<u8>,
    image_width: u32,
    image_height: u32,
}

impl NativePdfDocument {
    fn new(image: &NativeDocumentImage) -> Result<Self, MarkdownError> {
        let page_width = image.width as f32 * IMAGE_SCALE_RATIO;
        Ok(Self {
            page_width,
            page_height: page_width * LETTER_PAGE_HEIGHT_RATIO,
            image_display_height: image.height as f32 * IMAGE_SCALE_RATIO,
            jpeg: image.jpeg_bytes()?,
            image_width: image.width,
            image_height: image.height,
        })
    }

    fn to_bytes(&self) -> Vec<u8> {
        let objects = self.objects();
        let mut output = b"%PDF-1.4\n".to_vec();
        let mut offsets = Vec::with_capacity(objects.len());
        for (index, object) in objects.iter().enumerate() {
            offsets.push(output.len());
            output.extend_from_slice(format!("{} 0 obj\n", index + 1).as_bytes());
            output.extend_from_slice(object);
            output.extend_from_slice(b"\nendobj\n");
        }
        let xref_offset = output.len();
        output.extend_from_slice(format!("xref\n0 {}\n", objects.len() + 1).as_bytes());
        output.extend_from_slice(b"0000000000 65535 f \n");
        for offset in offsets {
            output.extend_from_slice(format!("{offset:010} 00000 n \n").as_bytes());
        }
        output.extend_from_slice(
            format!(
                "trailer\n<< /Size {} /Root 1 0 R >>\nstartxref\n{xref_offset}\n%%EOF\n",
                objects.len() + 1
            )
            .as_bytes(),
        );
        output
    }

    fn objects(&self) -> Vec<Vec<u8>> {
        let page_count = self.page_count();
        let image_object_id = Self::image_object_id(page_count);
        let mut objects = vec![
            b"<< /Type /Catalog /Pages 2 0 R >>".to_vec(),
            self.pages_object(page_count),
        ];
        for page_index in 0..page_count {
            objects.push(self.page_object(page_count, page_index, image_object_id));
        }
        objects.push(self.image_object());
        for page_index in 0..page_count {
            objects.push(self.content_object(page_index));
        }
        objects
    }

    fn page_count(&self) -> usize {
        (self.image_display_height / self.page_height)
            .ceil()
            .max(1.0) as usize
    }

    fn pages_object(&self, page_count: usize) -> Vec<u8> {
        let kids = (0..page_count)
            .map(|page_index| format!("{} 0 R", Self::page_object_id(page_index)))
            .collect::<Vec<_>>()
            .join(" ");
        format!("<< /Type /Pages /Kids [{kids}] /Count {page_count} >>").into_bytes()
    }

    fn page_object(&self, page_count: usize, page_index: usize, image_object_id: usize) -> Vec<u8> {
        let content_object_id = Self::content_object_id(page_count, page_index);
        format!(
            "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 {:.2} {:.2}] /Resources << /XObject << /Im1 {} 0 R >> >> /Contents {} 0 R >>",
            self.page_width, self.page_height, image_object_id, content_object_id
        )
        .into_bytes()
    }

    fn image_object(&self) -> Vec<u8> {
        let mut object = format!(
            "<< /Type /XObject /Subtype /Image /Width {} /Height {} /ColorSpace /DeviceRGB /BitsPerComponent 8 /Filter /DCTDecode /Length {} >>\nstream\n",
            self.image_width,
            self.image_height,
            self.jpeg.len()
        )
        .into_bytes();
        object.extend_from_slice(&self.jpeg);
        object.extend_from_slice(b"\nendstream");
        object
    }

    fn content_object(&self, page_index: usize) -> Vec<u8> {
        let image_y =
            self.page_height - self.image_display_height + self.page_height * page_index as f32;
        let commands = format!(
            "q\n0 0 {:.2} {:.2} re\nW\nn\n{:.2} 0 0 {:.2} 0 {:.2} cm\n/Im1 Do\nQ\n",
            self.page_width, self.page_height, self.page_width, self.image_display_height, image_y
        );
        let mut object = format!("<< /Length {} >>\nstream\n", commands.len()).into_bytes();
        object.extend_from_slice(commands.as_bytes());
        object.extend_from_slice(b"endstream");
        object
    }

    fn page_object_id(page_index: usize) -> usize {
        page_index + PDF_PAGE_TREE_OBJECT_ID
    }

    fn image_object_id(page_count: usize) -> usize {
        page_count + PDF_PAGE_TREE_OBJECT_ID
    }

    fn content_object_id(page_count: usize, page_index: usize) -> usize {
        Self::image_object_id(page_count) + page_index + 1
    }
}

#[cfg(test)]
#[path = "pdf_tests.rs"]
mod tests;