use crate::object::Object;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct StructTreeRoot {
pub root_elements: Vec<StructElem>,
pub parent_tree: Option<ParentTree>,
pub role_map: HashMap<String, String>,
}
impl StructTreeRoot {
pub fn new() -> Self {
Self {
root_elements: Vec::new(),
parent_tree: None,
role_map: HashMap::new(),
}
}
pub fn add_root_element(&mut self, elem: StructElem) {
self.root_elements.push(elem);
}
}
impl Default for StructTreeRoot {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct StructElem {
pub struct_type: StructType,
pub children: Vec<StructChild>,
pub page: Option<u32>,
pub attributes: HashMap<String, Object>,
pub alt_text: Option<String>,
pub expansion: Option<String>,
pub actual_text: Option<String>,
pub source_role: Option<String>,
}
impl StructElem {
pub fn new(struct_type: StructType) -> Self {
Self {
struct_type,
children: Vec::new(),
page: None,
attributes: HashMap::new(),
alt_text: None,
expansion: None,
actual_text: None,
source_role: None,
}
}
pub fn add_child(&mut self, child: StructChild) {
self.children.push(child);
}
}
#[derive(Debug, Clone)]
pub enum StructChild {
StructElem(Box<StructElem>),
MarkedContentRef {
mcid: u32,
page: u32,
},
ObjectRef(u32, u16), }
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StructType {
Document,
Part,
Art,
Sect,
Div,
P,
H,
H1,
H2,
H3,
H4,
H5,
H6,
L,
LI,
Lbl,
LBody,
Table,
TR,
TH,
TD,
THead,
TBody,
TFoot,
Span,
Quote,
Note,
Reference,
BibEntry,
Code,
Link,
Annot,
WB,
Figure,
Formula,
Form,
Custom(String),
}
impl StructType {
pub fn from_str(s: &str) -> Self {
match s {
"Document" => Self::Document,
"Part" => Self::Part,
"Art" => Self::Art,
"Sect" => Self::Sect,
"Div" => Self::Div,
"P" => Self::P,
"H" => Self::H,
"H1" => Self::H1,
"H2" => Self::H2,
"H3" => Self::H3,
"H4" => Self::H4,
"H5" => Self::H5,
"H6" => Self::H6,
"L" => Self::L,
"LI" => Self::LI,
"Lbl" => Self::Lbl,
"LBody" => Self::LBody,
"Table" => Self::Table,
"TR" => Self::TR,
"TH" => Self::TH,
"TD" => Self::TD,
"THead" => Self::THead,
"TBody" => Self::TBody,
"TFoot" => Self::TFoot,
"Span" => Self::Span,
"Quote" => Self::Quote,
"Note" => Self::Note,
"Reference" => Self::Reference,
"BibEntry" => Self::BibEntry,
"Code" => Self::Code,
"Link" => Self::Link,
"Annot" => Self::Annot,
"WB" => Self::WB,
"Figure" => Self::Figure,
"Formula" => Self::Formula,
"Form" => Self::Form,
_ => Self::Custom(s.to_string()),
}
}
pub fn is_heading(&self) -> bool {
matches!(self, Self::H | Self::H1 | Self::H2 | Self::H3 | Self::H4 | Self::H5 | Self::H6)
}
pub fn is_block(&self) -> bool {
matches!(
self,
Self::Document
| Self::Part
| Self::Art
| Self::Sect
| Self::Div
| Self::P
| Self::H
| Self::H1
| Self::H2
| Self::H3
| Self::H4
| Self::H5
| Self::H6
| Self::Table
| Self::Figure
| Self::Formula
)
}
pub fn heading_level(&self) -> Option<u8> {
match self {
Self::H | Self::H1 => Some(1),
Self::H2 => Some(2),
Self::H3 => Some(3),
Self::H4 => Some(4),
Self::H5 => Some(5),
Self::H6 => Some(6),
_ => None,
}
}
pub fn is_list(&self) -> bool {
matches!(self, Self::L | Self::LI | Self::Lbl | Self::LBody)
}
pub fn is_word_break(&self) -> bool {
matches!(self, Self::WB)
}
pub fn markdown_prefix(&self) -> Option<&'static str> {
match self {
Self::H1 => Some("# "),
Self::H2 => Some("## "),
Self::H3 => Some("### "),
Self::H4 => Some("#### "),
Self::H5 => Some("##### "),
Self::H6 => Some("###### "),
Self::Lbl => Some("- "),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub struct ParentTree {
pub page_mappings: HashMap<u32, HashMap<u32, ParentTreeEntry>>,
}
impl ParentTree {
pub fn new() -> Self {
Self {
page_mappings: HashMap::new(),
}
}
pub fn get_parent(&self, page: u32, mcid: u32) -> Option<&ParentTreeEntry> {
self.page_mappings
.get(&page)
.and_then(|page_map| page_map.get(&mcid))
}
}
impl Default for ParentTree {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub enum ParentTreeEntry {
StructElem(Box<StructElem>),
ObjectRef(u32, u16), }
#[derive(Debug, Clone, Default)]
pub struct MarkInfo {
pub marked: bool,
pub suspects: bool,
pub user_properties: bool,
}
impl MarkInfo {
pub fn new() -> Self {
Self::default()
}
pub fn is_structure_reliable(&self) -> bool {
self.marked && !self.suspects
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_struct_type_parsing() {
assert_eq!(StructType::from_str("P"), StructType::P);
assert_eq!(StructType::from_str("H1"), StructType::H1);
assert_eq!(StructType::from_str("Document"), StructType::Document);
match StructType::from_str("CustomType") {
StructType::Custom(s) => assert_eq!(s, "CustomType"),
_ => panic!("Expected Custom type"),
}
}
#[test]
fn test_is_heading() {
assert!(StructType::H1.is_heading());
assert!(StructType::H2.is_heading());
assert!(StructType::H.is_heading());
assert!(!StructType::P.is_heading());
assert!(!StructType::Document.is_heading());
}
#[test]
fn test_is_block() {
assert!(StructType::P.is_block());
assert!(StructType::H1.is_block());
assert!(StructType::Document.is_block());
assert!(!StructType::Span.is_block());
assert!(!StructType::Link.is_block());
}
#[test]
fn test_heading_level() {
assert_eq!(StructType::H.heading_level(), Some(1));
assert_eq!(StructType::H1.heading_level(), Some(1));
assert_eq!(StructType::H2.heading_level(), Some(2));
assert_eq!(StructType::H3.heading_level(), Some(3));
assert_eq!(StructType::H4.heading_level(), Some(4));
assert_eq!(StructType::H5.heading_level(), Some(5));
assert_eq!(StructType::H6.heading_level(), Some(6));
assert_eq!(StructType::P.heading_level(), None);
assert_eq!(StructType::Document.heading_level(), None);
}
#[test]
fn test_is_list() {
assert!(StructType::L.is_list());
assert!(StructType::LI.is_list());
assert!(StructType::Lbl.is_list());
assert!(StructType::LBody.is_list());
assert!(!StructType::P.is_list());
assert!(!StructType::H1.is_list());
assert!(!StructType::Table.is_list());
}
#[test]
fn test_markdown_prefix() {
assert_eq!(StructType::H1.markdown_prefix(), Some("# "));
assert_eq!(StructType::H2.markdown_prefix(), Some("## "));
assert_eq!(StructType::H3.markdown_prefix(), Some("### "));
assert_eq!(StructType::H4.markdown_prefix(), Some("#### "));
assert_eq!(StructType::H5.markdown_prefix(), Some("##### "));
assert_eq!(StructType::H6.markdown_prefix(), Some("###### "));
assert_eq!(StructType::Lbl.markdown_prefix(), Some("- "));
assert_eq!(StructType::P.markdown_prefix(), None);
assert_eq!(StructType::Table.markdown_prefix(), None);
}
}