use std::cell::LazyCell;
use std::cmp::PartialEq;
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashMap};
use std::io::Write as _;
use pdf_writer::types::{ArtifactSubtype, RoleMapOpts, StructRole, StructRole2};
use pdf_writer::writers::{PropertyList, StructElement};
use pdf_writer::{Chunk, Finish, Name, Ref, Str, TextStr};
use smallvec::SmallVec;
use crate::configure::{PdfVersion, ValidationError};
use crate::error::{KrillaError, KrillaResult};
use crate::page::page_root_transform;
use crate::serialize::SerializeContext;
pub use tag::*;
pub mod fmt;
mod tag;
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum ArtifactType {
Header,
Footer,
Page,
Other,
}
impl ArtifactType {
pub(crate) fn requires_properties(&self) -> bool {
match self {
ArtifactType::Header => true,
ArtifactType::Footer => true,
ArtifactType::Page => true,
ArtifactType::Other => false,
}
}
}
pub type Lang<'a> = &'a str;
pub type Alt<'a> = &'a str;
pub type ActualText<'a> = &'a str;
pub type Expanded<'a> = &'a str;
#[derive(Clone, Copy, Debug)]
pub enum ContentTag<'a> {
Artifact(ArtifactType),
Span(SpanTag<'a>),
Other,
}
impl ContentTag<'_> {
pub(crate) fn name(&self) -> Name<'static> {
match self {
ContentTag::Artifact(_) => Name(b"Artifact"),
ContentTag::Span(_) => Name(b"Span"),
ContentTag::Other => Name(b"P"),
}
}
pub(crate) fn write_properties(&self, sc: &mut SerializeContext, mut properties: PropertyList) {
match self {
ContentTag::Artifact(at) => {
let mut artifact = properties.artifact();
let artifact_type = match at {
ArtifactType::Header => pdf_writer::types::ArtifactType::Pagination,
ArtifactType::Footer => pdf_writer::types::ArtifactType::Pagination,
ArtifactType::Page => pdf_writer::types::ArtifactType::Page,
ArtifactType::Other => unreachable!(),
};
if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf17 {
if *at == ArtifactType::Header {
artifact.attached([pdf_writer::types::ArtifactAttachment::Top]);
artifact.subtype(ArtifactSubtype::Header);
}
if *at == ArtifactType::Footer {
artifact.attached([pdf_writer::types::ArtifactAttachment::Bottom]);
artifact.subtype(ArtifactSubtype::Footer);
}
}
artifact.kind(artifact_type);
}
ContentTag::Span(SpanTag {
lang,
alt_text,
expanded,
actual_text,
}) => {
if let Some(lang) = lang {
properties.pair(Name(b"Lang"), TextStr(lang));
}
if let Some(alt) = alt_text {
if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf15 {
properties.pair(Name(b"Alt"), TextStr(alt));
}
}
if let Some(exp) = expanded {
properties.pair(Name(b"E"), TextStr(exp));
}
if let Some(actual) = actual_text {
if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf15 {
properties.actual_text(TextStr(actual));
}
}
}
ContentTag::Other => {}
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct SpanTag<'a> {
pub lang: Option<Lang<'a>>,
pub alt_text: Option<Alt<'a>>,
pub expanded: Option<Expanded<'a>>,
pub actual_text: Option<ActualText<'a>>,
}
impl<'a> SpanTag<'a> {
pub fn empty() -> Self {
Self {
lang: None,
alt_text: None,
expanded: None,
actual_text: None,
}
}
pub fn with_lang(mut self, lang: Option<&'a str>) -> Self {
self.lang = lang;
self
}
pub fn with_alt_text(mut self, alt_text: Option<&'a str>) -> Self {
self.alt_text = alt_text;
self
}
pub fn with_expanded(mut self, expanded: Option<&'a str>) -> Self {
self.expanded = expanded;
self
}
pub fn with_actual_text(mut self, actual_text: Option<&'a str>) -> Self {
self.actual_text = actual_text;
self
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct PageTagIdentifier {
pub(crate) page_index: usize,
pub(crate) mcid: i32,
}
impl From<PageTagIdentifier> for IdentifierType {
fn from(value: PageTagIdentifier) -> Self {
IdentifierType::PageIdentifier(value)
}
}
impl From<PageTagIdentifier> for Identifier {
fn from(value: PageTagIdentifier) -> Self {
Identifier(IdentifierInner::Real(value.into()))
}
}
impl PageTagIdentifier {
pub(crate) fn new(page_index: usize, mcid: i32) -> Self {
Self { page_index, mcid }
}
pub(crate) fn bump(&mut self) -> PageTagIdentifier {
let old = *self;
self.mcid = self.mcid.checked_add(1).unwrap();
old
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub(crate) struct AnnotationIdentifier {
pub(crate) page_index: usize,
pub(crate) annot_index: usize,
}
impl From<AnnotationIdentifier> for IdentifierType {
fn from(value: AnnotationIdentifier) -> Self {
IdentifierType::AnnotationIdentifier(value)
}
}
impl From<AnnotationIdentifier> for Identifier {
fn from(value: AnnotationIdentifier) -> Self {
Identifier(IdentifierInner::Real(value.into()))
}
}
impl AnnotationIdentifier {
pub fn new(page_index: usize, annot_index: usize) -> Self {
Self {
page_index,
annot_index,
}
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) enum IdentifierType {
PageIdentifier(PageTagIdentifier),
AnnotationIdentifier(AnnotationIdentifier),
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub(crate) enum IdentifierInner {
Real(IdentifierType),
Dummy,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Identifier(pub(crate) IdentifierInner);
impl Identifier {
pub(crate) fn new_annotation(page_index: usize, annot_index: usize) -> Self {
AnnotationIdentifier::new(page_index, annot_index).into()
}
pub(crate) fn dummy() -> Self {
Self(IdentifierInner::Dummy)
}
}
impl TagKind {
pub(crate) fn write_kind(&self, struct_elem: &mut StructElement, sc: &mut SerializeContext) {
let pdf_version = sc.serialize_settings().pdf_version();
if pdf_version < self.minimum_version() {
struct_elem.kind(StructRole::P);
return;
}
match self {
Self::Part(_) => write_kind_compat(sc, struct_elem, StructRole2::Part),
Self::Article(_) => write_kind_1_7(struct_elem, StructRole::Art),
Self::Section(_) => write_kind_compat(sc, struct_elem, StructRole2::Sect),
Self::Div(_) => write_kind_compat(sc, struct_elem, StructRole2::Div),
Self::BlockQuote(_) => write_kind_1_7(struct_elem, StructRole::BlockQuote),
Self::Caption(_) => write_kind_compat(sc, struct_elem, StructRole2::Caption),
Self::TOC(_) => write_kind_1_7(struct_elem, StructRole::TOC),
Self::TOCI(_) => write_kind_1_7(struct_elem, StructRole::TOCI),
Self::Index(_) => write_kind_1_7(struct_elem, StructRole::Index),
Self::P(_) => write_kind_compat(sc, struct_elem, StructRole2::P),
Self::L(_) => write_kind_compat(sc, struct_elem, StructRole2::L),
Self::LI(_) => write_kind_compat(sc, struct_elem, StructRole2::LI),
Self::Lbl(_) => write_kind_compat(sc, struct_elem, StructRole2::Lbl),
Self::LBody(_) => write_kind_compat(sc, struct_elem, StructRole2::LBody),
Self::Table(_) => write_kind_compat(sc, struct_elem, StructRole2::Table),
Self::TR(_) => write_kind_compat(sc, struct_elem, StructRole2::TR),
Self::TH(_) => write_kind_compat(sc, struct_elem, StructRole2::TH),
Self::TD(_) => write_kind_compat(sc, struct_elem, StructRole2::TD),
Self::THead(_) => write_kind_compat(sc, struct_elem, StructRole2::THead),
Self::TBody(_) => write_kind_compat(sc, struct_elem, StructRole2::TBody),
Self::TFoot(_) => write_kind_compat(sc, struct_elem, StructRole2::TFoot),
Self::Span(_) => write_kind_compat(sc, struct_elem, StructRole2::Span),
Self::InlineQuote(_) => write_kind_1_7(struct_elem, StructRole::Quote),
Self::Note(_) => write_kind_1_7(struct_elem, StructRole::Note),
Self::Reference(_) => write_kind_1_7(struct_elem, StructRole::Reference),
Self::BibEntry(_) => write_kind_1_7(struct_elem, StructRole::BibEntry),
Self::Code(_) => write_kind_1_7(struct_elem, StructRole::Code),
Self::Link(_) => write_kind_compat(sc, struct_elem, StructRole2::Link),
Self::Annot(_) => write_kind_compat(sc, struct_elem, StructRole2::Annot),
Self::Figure(_) => write_kind_compat(sc, struct_elem, StructRole2::Figure),
Self::Formula(_) => write_kind_compat(sc, struct_elem, StructRole2::Formula),
Self::Form(_) => write_kind_compat(sc, struct_elem, StructRole2::Form),
Self::NonStruct(_) => write_kind_compat(sc, struct_elem, StructRole2::NonStruct),
Self::Datetime(_) => write_kind_custom(sc, struct_elem, Name(b"Datetime")),
Self::Terms(_) => write_kind_custom(sc, struct_elem, Name(b"Terms")),
Self::Title(_) => write_kind_custom(sc, struct_elem, Name(b"Title")),
Self::Hn(tag) => {
let role2 = StructRole2::Heading(tag.level());
if pdf_version < PdfVersion::Pdf20 {
let compat = role2.compatibility_1_7(RoleMapOpts::default());
if compat.into_pdf_1_7().is_none() {
sc.global_objects.custom_heading_roles.insert(tag.level());
}
struct_elem.custom_kind(role2.to_name(&mut [0; 6]));
} else {
struct_elem.kind_2(role2, sc.pdf2_ns.ssn_ref);
}
}
Self::Strong(_) => {
if pdf_version < PdfVersion::Pdf20 {
struct_elem.custom_kind(Name(b"Strong"));
} else {
struct_elem.kind_2(StructRole2::Strong, sc.pdf2_ns.ssn_ref);
}
}
Self::Em(_) => {
if pdf_version < PdfVersion::Pdf20 {
struct_elem.custom_kind(Name(b"Em"));
} else {
struct_elem.kind_2(StructRole2::Em, sc.pdf2_ns.ssn_ref);
}
}
};
}
pub(crate) fn minimum_version(&self) -> PdfVersion {
match self {
Self::Part(_) => PdfVersion::Pdf14,
Self::Article(_) => PdfVersion::Pdf14,
Self::Section(_) => PdfVersion::Pdf14,
Self::Div(_) => PdfVersion::Pdf14,
Self::BlockQuote(_) => PdfVersion::Pdf14,
Self::Caption(_) => PdfVersion::Pdf14,
Self::TOC(_) => PdfVersion::Pdf14,
Self::TOCI(_) => PdfVersion::Pdf14,
Self::Index(_) => PdfVersion::Pdf14,
Self::P(_) => PdfVersion::Pdf14,
Self::Hn(_) => PdfVersion::Pdf14,
Self::L(_) => PdfVersion::Pdf14,
Self::LI(_) => PdfVersion::Pdf14,
Self::Lbl(_) => PdfVersion::Pdf14,
Self::LBody(_) => PdfVersion::Pdf14,
Self::Table(_) => PdfVersion::Pdf14,
Self::TR(_) => PdfVersion::Pdf14,
Self::TH(_) => PdfVersion::Pdf14,
Self::TD(_) => PdfVersion::Pdf14,
Self::THead(_) => PdfVersion::Pdf15,
Self::TBody(_) => PdfVersion::Pdf15,
Self::TFoot(_) => PdfVersion::Pdf15,
Self::Span(_) => PdfVersion::Pdf14,
Self::InlineQuote(_) => PdfVersion::Pdf14,
Self::Note(_) => PdfVersion::Pdf14,
Self::Reference(_) => PdfVersion::Pdf14,
Self::BibEntry(_) => PdfVersion::Pdf14,
Self::Code(_) => PdfVersion::Pdf14,
Self::Link(_) => PdfVersion::Pdf14,
Self::Annot(_) => PdfVersion::Pdf15,
Self::Figure(_) => PdfVersion::Pdf14,
Self::Formula(_) => PdfVersion::Pdf14,
Self::Form(_) => PdfVersion::Pdf14,
Self::NonStruct(_) => PdfVersion::Pdf14,
Self::Datetime(_) => PdfVersion::Pdf14,
Self::Terms(_) => PdfVersion::Pdf14,
Self::Title(_) => PdfVersion::Pdf14,
Self::Strong(_) => PdfVersion::Pdf14,
Self::Em(_) => PdfVersion::Pdf14,
}
}
pub(crate) fn should_have_alt(&self) -> bool {
matches!(self, TagKind::Figure(_) | TagKind::Formula(_))
}
pub(crate) fn can_have_title(&self) -> bool {
matches!(self, Self::Hn(_))
}
}
fn write_kind_1_7(struct_elem: &mut StructElement, role: StructRole) {
struct_elem.kind(role);
}
fn write_kind_compat(
sc: &mut SerializeContext,
struct_elem: &mut StructElement,
role: StructRole2,
) {
if sc.serialize_settings().pdf_version() < PdfVersion::Pdf20 {
let compat = role.compatibility_1_7(RoleMapOpts::default());
struct_elem.kind(compat.role());
} else {
struct_elem.kind_2(role, sc.pdf2_ns.ssn_ref);
}
}
fn write_kind_custom(sc: &mut SerializeContext, struct_elem: &mut StructElement, name: Name) {
struct_elem.custom_kind(name);
if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf20 {
struct_elem.namespace(sc.pdf2_ns.krilla_ref);
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Node {
Group(TagGroup),
Leaf(Identifier),
}
impl Node {
pub(crate) fn serialize(
&self,
sc: &mut SerializeContext,
parent_tree_map: &mut HashMap<IdentifierType, Ref>,
id_tree: &mut BTreeMap<TagId, Ref>,
parent: Ref,
note_id: &mut u32,
struct_elems: &mut Vec<Chunk>,
) -> KrillaResult<Option<Reference>> {
match self {
Node::Group(g) => Ok(Some(g.serialize(
sc,
parent_tree_map,
id_tree,
parent,
note_id,
struct_elems,
)?)),
Node::Leaf(ci) => match ci.0 {
IdentifierInner::Real(rci) => Ok(Some(Reference::ContentIdentifier(rci))),
IdentifierInner::Dummy => Ok(None),
},
}
}
}
impl From<TagGroup> for Node {
fn from(value: TagGroup) -> Self {
Node::Group(value)
}
}
impl From<Identifier> for Node {
fn from(value: Identifier) -> Self {
Node::Leaf(value)
}
}
#[derive(Clone, Copy)]
pub(crate) enum Reference {
Ref(Ref),
ContentIdentifier(IdentifierType),
}
#[derive(Debug, Clone, PartialEq)]
pub struct TagGroup {
pub tag: TagKind,
pub children: Vec<Node>,
}
impl TagGroup {
pub fn new(tag: impl Into<TagKind>) -> Self {
Self {
tag: tag.into(),
children: vec![],
}
}
pub fn with_children(tag: impl Into<TagKind>, children: Vec<Node>) -> Self {
Self {
tag: tag.into(),
children,
}
}
pub fn push(&mut self, child: impl Into<Node>) {
self.children.push(child.into())
}
pub(crate) fn serialize(
&self,
sc: &mut SerializeContext,
parent_tree_map: &mut HashMap<IdentifierType, Ref>,
id_tree: &mut BTreeMap<TagId, Ref>,
parent_ref: Ref,
note_id: &mut u32,
struct_elems: &mut Vec<Chunk>,
) -> KrillaResult<Reference> {
let elem_ref = sc.new_ref();
let mut children_refs = vec![];
for child in &self.children {
let serialized = child.serialize(
sc,
parent_tree_map,
id_tree,
elem_ref,
note_id,
struct_elems,
)?;
if let Some(ref_) = serialized {
children_refs.push(ref_);
}
}
let mut chunk = Chunk::new();
let mut struct_elem = chunk.struct_element(elem_ref);
self.tag.write_kind(&mut struct_elem, sc);
struct_elem.parent(parent_ref);
let tag = self.tag.as_any();
let pdf_version = sc.serialize_settings().pdf_version();
if let Some(id) = tag.id() {
match id_tree.entry(id.clone()) {
Entry::Vacant(vacant) => {
struct_elem.id(Str(id.as_bytes()));
vacant.insert(elem_ref);
}
Entry::Occupied(_) => {
return Err(KrillaError::DuplicateTagId(id.clone(), tag.location));
}
}
} else if matches!(self.tag, TagKind::Note(_)) {
let mut id = TagId(SmallVec::new());
_ = write!(&mut id.0, "Note {note_id}");
struct_elem.id(Str(id.as_bytes()));
id_tree.insert(id, elem_ref);
*note_id += 1;
}
if self.tag.can_have_title() && tag.title().is_none_or(str::is_empty) {
sc.register_validation_error(ValidationError::MissingHeadingTitle);
}
if self.tag.should_have_alt() && tag.alt_text().is_none_or(str::is_empty) {
sc.register_validation_error(ValidationError::MissingAltText(tag.location));
}
for attr in tag.attrs.iter() {
let Attr::Struct(attr) = attr else {
continue;
};
match attr {
StructAttr::Id(_) => (), StructAttr::Title(title) => {
struct_elem.title(TextStr(title));
}
StructAttr::Lang(lang) => {
if pdf_version >= PdfVersion::Pdf14 {
struct_elem.lang(TextStr(lang));
}
}
StructAttr::AltText(alt) => {
struct_elem.alt(TextStr(alt));
}
StructAttr::Expanded(expanded) => {
if pdf_version >= PdfVersion::Pdf15 {
struct_elem.expanded(TextStr(expanded));
}
}
StructAttr::ActualText(actual_text) => {
if pdf_version >= PdfVersion::Pdf14 {
struct_elem.actual_text(TextStr(actual_text));
}
}
StructAttr::HeadingLevel(_) => (),
}
}
let mut attributes = LazyCell::new(|| struct_elem.attributes());
let mut list_attributes = LazyCell::new(|| attributes.push().list());
for attr in tag.attrs.iter() {
let Attr::List(attr) = attr else {
continue;
};
match attr {
ListAttr::Numbering(numbering) => {
list_attributes.list_numbering(numbering.to_pdf());
}
}
}
list_attributes.finish();
let mut table_attributes = LazyCell::new(|| attributes.push().table());
for attr in tag.attrs.iter() {
let Attr::Table(attr) = attr else {
continue;
};
match attr {
TableAttr::Summary(summary) => {
if pdf_version >= PdfVersion::Pdf17 {
table_attributes.summary(TextStr(summary));
}
}
TableAttr::HeaderScope(scope) => {
if pdf_version >= PdfVersion::Pdf15 {
table_attributes.scope(scope.to_pdf());
}
}
TableAttr::CellHeaders(headers) => {
if pdf_version >= PdfVersion::Pdf15 {
let id_strs = headers.iter().map(|id| Str(id.as_bytes()));
table_attributes.headers().items(id_strs);
}
}
TableAttr::RowSpan(n) => {
table_attributes.row_span(n.get() as i32);
}
TableAttr::ColSpan(n) => {
table_attributes.col_span(n.get() as i32);
}
}
}
table_attributes.finish();
let mut layout_attributes = LazyCell::new(|| attributes.push().layout());
for attr in tag.attrs.iter() {
let Attr::Layout(attr) = attr else {
continue;
};
match attr {
LayoutAttr::Placement(placement) => {
layout_attributes.placement(placement.to_pdf());
}
LayoutAttr::WritingMode(writing_mode) => {
layout_attributes.writing_mode(writing_mode.to_pdf());
}
&LayoutAttr::BBox(BBox { page_idx, rect }) => {
let Some(page_info) = sc.page_infos().get(page_idx) else {
panic!(
"tag tree contains bounding box with page index {page_idx}, \
but document only has {} pages",
sc.page_infos().len()
);
};
let transform = page_root_transform(page_info.size().height());
let actual_rect = rect.transform(transform).unwrap();
layout_attributes.bbox(actual_rect.to_pdf_rect());
}
&LayoutAttr::Width(width) => {
layout_attributes.width(width);
}
&LayoutAttr::Height(height) => {
layout_attributes.height(height);
}
&LayoutAttr::BackgroundColor(color) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.background_color(color.into());
}
}
LayoutAttr::BorderColor(sides) => {
if pdf_version >= PdfVersion::Pdf15 {
let sides = sides.map_pdf(NaiveRgbColor::into_f32_array);
layout_attributes.border_color(sides);
}
}
LayoutAttr::BorderStyle(sides) => {
if pdf_version >= PdfVersion::Pdf15 {
let sides = sides.map_pdf(BorderStyle::to_pdf);
layout_attributes.border_style(sides);
}
}
LayoutAttr::BorderThickness(sides) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.border_thickness(sides.into_pdf());
}
}
LayoutAttr::Padding(sides) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.padding(sides.into_pdf());
}
}
&LayoutAttr::Color(color) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.color(color.into());
}
}
&LayoutAttr::SpaceBefore(margin) => {
layout_attributes.space_before(margin);
}
&LayoutAttr::SpaceAfter(margin) => {
layout_attributes.space_after(margin);
}
&LayoutAttr::StartIndent(margin) => {
layout_attributes.start_indent(margin);
}
&LayoutAttr::EndIndent(margin) => {
layout_attributes.end_indent(margin);
}
&LayoutAttr::TextIndent(indent) => {
layout_attributes.text_indent(indent);
}
LayoutAttr::BlockAlign(alignment) => {
layout_attributes.block_align(alignment.to_pdf());
}
LayoutAttr::InlineAlign(alignment) => {
layout_attributes.inline_align(alignment.to_pdf());
}
LayoutAttr::TextAlign(alignment) => {
layout_attributes.text_align(alignment.to_pdf());
}
LayoutAttr::TableBorderStyle(sides) => {
if pdf_version >= PdfVersion::Pdf15 {
let sides = sides.map_pdf(BorderStyle::to_pdf);
layout_attributes.table_border_style(sides);
}
}
LayoutAttr::TablePadding(sides) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.table_padding(sides.into_pdf());
}
}
&LayoutAttr::BaselineShift(shift) => {
layout_attributes.baseline_shift(shift);
}
LayoutAttr::LineHeight(height) => {
layout_attributes.line_height(height.to_pdf());
}
&LayoutAttr::TextDecorationColor(color) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.text_decoration_color(color.into());
}
}
&LayoutAttr::TextDecorationThickness(thickness) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.text_decoration_thickness(thickness);
}
}
LayoutAttr::TextDecorationType(style) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.text_decoration_type(style.to_pdf());
}
}
&LayoutAttr::GlyphOrientationVertical(orientation) => {
if pdf_version >= PdfVersion::Pdf15 {
layout_attributes.glyph_orientation_vertical(orientation.to_pdf());
}
}
LayoutAttr::ColumnCount(columns) => {
if pdf_version >= PdfVersion::Pdf16 {
layout_attributes.column_count(columns.get() as i32);
}
}
LayoutAttr::ColumnGap(gap) => {
if pdf_version >= PdfVersion::Pdf16 {
let sizes = layout_attributes.column_gap();
match gap {
ColumnDimensions::All(gap) => sizes.uniform(*gap),
ColumnDimensions::Specific(values) => {
sizes.individual().items(values.iter().copied());
}
}
}
}
LayoutAttr::ColumnWidths(width) => {
if pdf_version >= PdfVersion::Pdf16 {
let sizes = layout_attributes.column_widths();
match width {
ColumnDimensions::All(width) => sizes.uniform(*width),
ColumnDimensions::Specific(values) => {
sizes.individual().items(values.iter().copied());
}
}
}
}
}
}
layout_attributes.finish();
attributes.finish();
serialize_children(
sc,
elem_ref,
children_refs,
parent_tree_map,
&mut struct_elem,
)?;
struct_elem.finish();
struct_elems.push(chunk);
Ok(Reference::Ref(elem_ref))
}
fn validate(&self, id_tree: &BTreeMap<TagId, Ref>) -> KrillaResult<()> {
if let Some(headers) = self.tag.headers() {
for id in headers.iter() {
if !id_tree.contains_key(id) {
return Err(KrillaError::UnknownTagId(id.clone(), self.tag.location()));
}
}
}
for child in self.children.iter() {
if let Node::Group(group) = child {
group.validate(id_tree)?;
}
}
Ok(())
}
}
#[derive(Default)]
pub struct TagTree {
pub children: Vec<Node>,
pub lang: Option<String>,
}
impl From<Vec<Node>> for TagTree {
fn from(children: Vec<Node>) -> Self {
Self {
children,
lang: None,
}
}
}
impl TagTree {
pub fn new() -> Self {
Self {
children: vec![],
lang: None,
}
}
pub fn with_lang(mut self, lang: Option<String>) -> Self {
self.lang = lang;
self
}
pub fn push(&mut self, child: impl Into<Node>) {
self.children.push(child.into())
}
pub(crate) fn serialize(
&self,
sc: &mut SerializeContext,
parent_tree_map: &mut HashMap<IdentifierType, Ref>,
id_tree_map: &mut BTreeMap<TagId, Ref>,
struct_tree_ref: Ref,
) -> KrillaResult<(Ref, Vec<Chunk>)> {
let root_ref = sc.new_ref();
let mut struct_elems = vec![];
let mut note_id = 1;
let mut children_refs = vec![];
for child in &self.children {
let serialized = child.serialize(
sc,
parent_tree_map,
id_tree_map,
root_ref,
&mut note_id,
&mut struct_elems,
)?;
if let Some(ref_) = serialized {
children_refs.push(ref_);
}
}
let mut chunk = Chunk::new();
let mut struct_elem = chunk.indirect(root_ref).start::<StructElement>();
struct_elem.kind(StructRole::Document);
struct_elem.parent(struct_tree_ref);
if let Some(lang) = &self.lang {
if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf14 {
struct_elem.lang(TextStr(lang));
}
}
serialize_children(
sc,
root_ref,
children_refs,
parent_tree_map,
&mut struct_elem,
)?;
struct_elem.finish();
struct_elems.push(chunk);
struct_elems = struct_elems.into_iter().rev().collect::<Vec<_>>();
Ok((root_ref, struct_elems))
}
pub(crate) fn validate(&self, id_tree: &BTreeMap<TagId, Ref>) -> KrillaResult<()> {
for child in self.children.iter() {
if let Node::Group(group) = child {
group.validate(id_tree)?;
}
}
Ok(())
}
}
fn serialize_children(
sc: &mut SerializeContext,
parent_ref: Ref,
children_refs: Vec<Reference>,
parent_tree_map: &mut HashMap<IdentifierType, Ref>,
struct_elem: &mut StructElement,
) -> KrillaResult<()> {
let mut struct_page_ref = None;
let mut struct_children = struct_elem.children();
for child in children_refs {
match child {
Reference::Ref(r) => {
struct_children.struct_element(r);
}
Reference::ContentIdentifier(it) => match it {
IdentifierType::PageIdentifier(pi) => {
let page_ref = sc
.page_infos()
.get(pi.page_index)
.unwrap_or_else(|| panic!("tag tree contains identifier from page {}, but document only has {} pages",
pi.page_index + 1,
sc.page_infos().len()))
.ref_();
if struct_page_ref.is_none() {
struct_page_ref = Some(page_ref);
}
if parent_tree_map.contains_key(&pi.into()) {
panic!("the identifier {pi:?} appears twice in the tag tree");
}
parent_tree_map.insert(pi.into(), parent_ref);
if struct_page_ref == Some(page_ref) {
struct_children.marked_content_id(pi.mcid);
} else {
struct_children
.marked_content_ref()
.marked_content_id(pi.mcid)
.page(page_ref);
}
}
IdentifierType::AnnotationIdentifier(ai) => {
let Some(page_info) = sc.page_infos_mut().get_mut(ai.page_index) else {
panic!(
"tag tree contains identifier from page {}, but document only has {} pages",
ai.page_index + 1,
sc.page_infos().len()
);
};
let page_ref = page_info.ref_();
let Some((annotation_ref, struct_parent)) =
page_info.annotations_mut().get_mut(ai.annot_index)
else {
panic!(
"tag tree contains identifier from annotation {} on page {}, but page only has {} annotations",
ai.annot_index + 1,
ai.page_index + 1,
page_info.annotations().len()
);
};
if parent_tree_map.contains_key(&ai.into()) {
panic!("identifier {ai:?} appears twice in the tag tree");
}
parent_tree_map.insert(ai.into(), *annotation_ref);
struct_parent.set(parent_ref).expect("only one parent");
struct_children
.object_ref()
.page(page_ref)
.object(*annotation_ref);
}
},
}
}
struct_children.finish();
if let Some(spr) = struct_page_ref {
struct_elem.page(spr);
}
Ok(())
}
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[allow(missing_docs)]
pub enum ArtifactAttachment {
Left,
Top,
Right,
Bottom,
}