use pdf_compliance::tagged::{StructElement, StructureTree};
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct DocumentStructure {
pub language: Option<String>,
pub elements: Vec<StructureNode>,
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct StructureNode {
pub tag: String,
pub raw_tag: String,
pub alt_text: Option<String>,
pub actual_text: Option<String>,
pub language: Option<String>,
pub page: Option<usize>,
pub heading_level: Option<u8>,
pub children: Vec<StructureNode>,
}
impl From<&StructElement> for StructureNode {
fn from(element: &StructElement) -> Self {
StructureNode {
tag: element.standard_type.clone(),
raw_tag: element.struct_type.clone(),
alt_text: element.alt.clone(),
actual_text: element.actual_text.clone(),
language: element.lang.clone(),
page: element.page_index,
heading_level: element.heading_level(),
children: element.children.iter().map(StructureNode::from).collect(),
}
}
}
pub(crate) fn from_structure_tree(tree: StructureTree) -> DocumentStructure {
DocumentStructure {
language: tree.lang,
elements: tree.root_elements.iter().map(StructureNode::from).collect(),
}
}
#[cfg(test)]
mod tests {
use super::*;
use pdf_compliance::tagged::{StructElement, StructureTree};
use std::collections::HashMap;
fn elem(
struct_type: &str,
standard: &str,
alt: Option<&str>,
children: Vec<StructElement>,
) -> StructElement {
StructElement {
struct_type: struct_type.to_string(),
standard_type: standard.to_string(),
alt: alt.map(str::to_string),
actual_text: None,
lang: None,
mcids: Vec::new(),
page_index: Some(0),
children,
}
}
#[test]
fn converts_nested_tree_with_headings_and_alt() {
let tree = StructureTree {
role_map: HashMap::new(),
lang: Some("en-US".to_string()),
root_elements: vec![elem(
"Document",
"Document",
None,
vec![
elem("H1", "H1", None, Vec::new()),
elem("Figure", "Figure", Some("A diagram"), Vec::new()),
],
)],
};
let structure = from_structure_tree(tree);
assert_eq!(structure.language.as_deref(), Some("en-US"));
assert_eq!(structure.elements.len(), 1);
let doc = &structure.elements[0];
assert_eq!(doc.tag, "Document");
assert_eq!(doc.heading_level, None);
assert_eq!(doc.children.len(), 2);
let heading = &doc.children[0];
assert_eq!(heading.tag, "H1");
assert_eq!(heading.heading_level, Some(1));
let figure = &doc.children[1];
assert_eq!(figure.tag, "Figure");
assert_eq!(figure.alt_text.as_deref(), Some("A diagram"));
assert_eq!(figure.heading_level, None);
assert_eq!(figure.page, Some(0));
}
#[test]
fn role_mapped_tag_differs_from_raw_tag() {
let node = StructureNode::from(&elem("Sect", "H2", None, Vec::new()));
assert_eq!(node.raw_tag, "Sect");
assert_eq!(node.tag, "H2");
assert_eq!(node.heading_level, Some(2));
}
}