use std::collections::HashMap;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::sync::{Arc, OnceLock};
use hayro_write::{ExtractionError, ExtractionQuery};
use pdf_writer::Ref;
pub use hayro_write::{Page, Pdf};
use crate::chunk_container::EmbeddedPdfChunk;
use crate::configure::{PdfVersion, ValidationError};
use crate::error::{KrillaError, KrillaResult};
use crate::serialize::SerializeContext;
use crate::surface::Location;
use crate::util::{Deferred, Prehashed};
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum PdfError {
InvalidPage(usize),
VersionMismatch(PdfVersion),
}
struct PdfDocumentRepr(Arc<Pdf>);
impl Debug for PdfDocumentRepr {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "PdfDocumentRepr {{ .. }}")
}
}
impl Hash for PdfDocumentRepr {
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.data().as_ref().as_ref().hash(state);
}
}
#[derive(Clone, Hash, PartialEq, Eq, Debug)]
pub struct PdfDocument(Arc<Prehashed<PdfDocumentRepr>>);
impl PdfDocument {
pub fn new(pdf: Arc<Pdf>) -> PdfDocument {
Self(Arc::new(Prehashed::new(PdfDocumentRepr(pdf))))
}
pub(crate) fn pdf(&self) -> &Pdf {
&self.0.deref().0
}
pub(crate) fn pages(&self) -> &[Page<'_>] {
self.0.deref().0.pages()
}
}
#[derive(Default, Debug)]
pub(crate) struct PdfDocumentInfo {
counter: u64,
query_refs: Vec<Ref>,
cached_xobjects: HashMap<usize, Ref>,
queries: Vec<ExtractionQuery>,
locations: Vec<Option<Location>>,
}
impl PdfDocumentInfo {
pub(crate) fn new(counter: u64) -> Self {
Self {
counter,
..Self::default()
}
}
}
#[derive(Default, Debug)]
pub(crate) struct PdfSerializerContext {
infos: HashMap<PdfDocument, PdfDocumentInfo>,
counter: u64,
}
impl PdfSerializerContext {
pub(crate) fn add_page(
&mut self,
document: &PdfDocument,
page_index: usize,
ref_: Ref,
location: Option<Location>,
) {
let info = self.get_info(document);
info.query_refs.push(ref_);
info.queries.push(ExtractionQuery::new_page(page_index));
info.locations.push(location);
}
fn get_info(&mut self, document: &PdfDocument) -> &mut PdfDocumentInfo {
self.infos.entry(document.clone()).or_insert_with(|| {
let info = PdfDocumentInfo::new(self.counter);
self.counter += 1;
info
})
}
#[must_use = "This method might not use the original ref and could return a different one that \
was previously cached."]
pub(crate) fn add_xobject(
&mut self,
document: &PdfDocument,
page_index: usize,
ref_: Ref,
location: Option<Location>,
) -> Ref {
let info = self.get_info(document);
if let Some(cached_ref) = info.cached_xobjects.get(&page_index) {
return *cached_ref;
}
info.query_refs.push(ref_);
info.queries.push(ExtractionQuery::new_xobject(page_index));
info.locations.push(location);
info.cached_xobjects.insert(page_index, ref_);
ref_
}
pub(crate) fn serialize(self, sc: &mut SerializeContext) -> KrillaResult<()> {
let page_tree_parent_ref = sc.page_tree_ref();
let krilla_version = sc.serialize_settings().configuration.version();
let mut entries = self.infos.into_iter().collect::<Vec<_>>();
entries.sort_by(|d1, d2| d1.1.counter.cmp(&d2.1.counter));
for (doc, info) in entries {
for location in info.locations.iter() {
sc.register_validation_error(ValidationError::EmbeddedPDF(*location))
}
let container = &mut sc.chunk_container;
let deferred_chunk = Deferred::new(move || {
let mut new_ref = Ref::new(1);
let first_location = info.locations.iter().flatten().next().cloned();
let pdf = doc.pdf();
let pdf_version = convert_pdf_version(pdf.version());
if krilla_version < pdf_version {
return Err(KrillaError::Pdf(
doc.clone(),
PdfError::VersionMismatch(pdf_version),
first_location,
));
}
let extracted =
hayro_write::extract(pdf, Box::new(|| new_ref.bump()), &info.queries);
let result = convert_extraction_result(extracted, &doc, first_location.as_ref())?;
debug_assert_eq!(info.query_refs.len(), result.root_refs.len());
let mut root_ref_mappings = HashMap::new();
root_ref_mappings.insert(result.page_tree_parent_ref, page_tree_parent_ref);
for ((should_ref, extraction_result), location) in info
.query_refs
.iter()
.zip(result.root_refs)
.zip(&info.locations)
{
let assigned_ref = convert_extraction_result(
extraction_result,
&doc,
location.clone().as_ref(),
)?;
root_ref_mappings.insert(assigned_ref, *should_ref);
}
Ok(EmbeddedPdfChunk {
root_ref_mappings,
original_chunk: result.chunk,
new_chunk: OnceLock::new(),
})
});
container.embedded_pdfs.push(deferred_chunk);
}
Ok(())
}
}
fn convert_pdf_version(version: hayro_write::PdfVersion) -> PdfVersion {
match version {
hayro_write::PdfVersion::Pdf10 => PdfVersion::Pdf14,
hayro_write::PdfVersion::Pdf11 => PdfVersion::Pdf14,
hayro_write::PdfVersion::Pdf12 => PdfVersion::Pdf14,
hayro_write::PdfVersion::Pdf13 => PdfVersion::Pdf14,
hayro_write::PdfVersion::Pdf14 => PdfVersion::Pdf14,
hayro_write::PdfVersion::Pdf15 => PdfVersion::Pdf15,
hayro_write::PdfVersion::Pdf16 => PdfVersion::Pdf16,
hayro_write::PdfVersion::Pdf17 => PdfVersion::Pdf17,
hayro_write::PdfVersion::Pdf20 => PdfVersion::Pdf20,
}
}
fn convert_extraction_result<T>(
result: Result<T, ExtractionError>,
doc: &PdfDocument,
location: Option<&Location>,
) -> KrillaResult<T> {
result.map_err(|e| {
let pdf_error = match e {
ExtractionError::InvalidPageIndex(i) => PdfError::InvalidPage(i),
};
KrillaError::Pdf(doc.clone(), pdf_error, location.cloned())
})
}